pax_global_header00006660000000000000000000000064141767724570014536gustar00rootroot0000000000000052 comment=c3c4974474173a847eb3e253fcef3e7685fd364f libplacebo-v4.192.1/000077500000000000000000000000001417677245700141765ustar00rootroot00000000000000libplacebo-v4.192.1/.github/000077500000000000000000000000001417677245700155365ustar00rootroot00000000000000libplacebo-v4.192.1/.github/FUNDING.yml000066400000000000000000000000641417677245700173530ustar00rootroot00000000000000github: haasn patreon: haasn open_collective: haasn libplacebo-v4.192.1/.gitignore000066400000000000000000000000561417677245700161670ustar00rootroot00000000000000/build* /tags /TAGS /demos/3rdparty *.exe *.o libplacebo-v4.192.1/.gitlab-ci.yml000066400000000000000000000137661417677245700166470ustar00rootroot00000000000000stages: - compile - test - test - sanitize linux: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Dshaderc=enabled -Dglslang=enabled - ninja -C build static: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: compile tags: - docker - amd64 script: - meson build --buildtype release --default-library static --werror -Dshaderc=enabled -Dglslang=enabled - ninja -C build win32: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true --libdir lib --cross-file /opt/crossfiles/i686-w64-mingw32.meson - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 win64: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true --libdir lib --cross-file /opt/crossfiles/x86_64-w64-mingw32.meson - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 aarch64: image: registry.videolan.org/libplacebo-ubuntu-focal-aarch64:20211020160908 stage: compile tags: - docker - aarch64 script: - meson build --buildtype release --werror -Dtests=true - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 macos: stage: compile tags: - amd64 - catalina script: - meson build --buildtype release -Ddefault_library=both -Dtests=true --werror - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 scan: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: compile tags: - docker - amd64 script: - env CC=clang meson build --buildtype debugoptimized --werror -Dtests=true -Ddemos=true -Dbench=true -Dshaderc=enabled -Dglslang=enabled - ninja -C build scan-build llvmpipe: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: test tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Ddemos=false -Dc_args='-DCI_ALLOW_SW' -Dshaderc=enabled -Dglslang=enabled - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 gpu: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: test tags: - gpu script: - meson build --buildtype release --werror -Dtests=true -Ddemos=false -Dshaderc=disabled -Db_coverage=true - ninja -C build - vulkaninfo - cd build && meson test -t 5 -v --num-processes=1 - ninja coverage-html - mv meson-logs/coveragereport ../coverage - ninja coverage-xml - grep -Eo 'line-rate="[^"]+"' meson-logs/coverage.xml | head -n 1 | grep -Eo '[0-9.]+' | awk '{ print "coverage:", $1 * 100 } ' coverage: '/^coverage: (\d+.\d+)$/' artifacts: expose_as: 'Coverage HTML report' paths: - coverage/ reports: cobertura: build/meson-logs/coverage.xml asan: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: sanitize tags: - gpu variables: ASAN_OPTIONS: 'detect_leaks=0' script: - meson build --buildtype debugoptimized -Dtests=true -Ddemos=false -Dshaderc=disabled -Db_sanitize=address - ninja -C build - cd build && time meson test -t 5 -v --num-processes=1 msan: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: sanitize tags: - gpu variables: MSAN_OPTIONS: 'exitcode=1' script: - env CC=clang meson build --buildtype debugoptimized -Dtests=true -Ddemos=false -Dc_args='-DMSAN' -Dglslang=disabled -Db_sanitize=memory -Db_lundef=false - ninja -C build - cd build && time meson test -t 5 -v --num-processes=1 ubsan: image: registry.videolan.org/libplacebo-ubuntu-focal:20210412100944 stage: sanitize tags: - gpu variables: UBSAN_OPTIONS: 'print_stacktrace=1:halt_on_error=1' script: - env CC=clang meson build --buildtype debugoptimized -Dtests=true -Ddemos=false -Dc_args='-fPIE' -Dglslang=disabled -Db_sanitize=undefined -Db_lundef=false - ninja -C build - cd build && time meson test -t 5 -v --num-processes=1 libplacebo-v4.192.1/.gitmodules000066400000000000000000000001741417677245700163550ustar00rootroot00000000000000[submodule "demos/3rdparty/nuklear"] path = demos/3rdparty/nuklear url = https://github.com/Immediate-Mode-UI/Nuklear.git libplacebo-v4.192.1/LICENSE000066400000000000000000000576361417677245700152240ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS libplacebo-v4.192.1/README.md000066400000000000000000000411361417677245700154620ustar00rootroot00000000000000# libplacebo [![gitlab-ci badge](https://code.videolan.org/videolan/libplacebo/badges/master/pipeline.svg)](https://code.videolan.org/videolan/libplacebo/pipelines) [![gitlab-ci coverage](https://code.videolan.org/videolan/libplacebo/badges/master/coverage.svg)](https://code.videolan.org/videolan/libplacebo/-/jobs/artifacts/master/file/coverage/index.html?job=test-gpu) [![Backers on Open Collective](https://opencollective.com/libplacebo/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/libplacebo/sponsors/badge.svg)](#sponsors) [![PayPal](https://img.shields.io/badge/donate-PayPal-blue.svg?logo=paypal)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=SFJUTMPSZEAHC) [![Patreon](https://img.shields.io/badge/pledge-Patreon-red.svg?logo=patreon)](https://www.patreon.com/haasn) **libplacebo** is, in a nutshell, the core rendering algorithms and ideas of [mpv](https://mpv.io) rewritten as an independent library. As of today, libplacebo contains a large assortment of video processing shaders, focusing on both quality and performance. These include features such as the following: - High-quality, optimized **upscaling and downscaling** including support for polar filters ("Jinc"), anti-aliasing, anti-ringing and gamma correct scaling. - **Color management** and format conversions for a wide variety of HDR or wide gamut color spaces. This includes support for ICC profiles, ITU-R BT.1886 emulation, colorimetrically accurate clipping, custom 1D/3D LUTs, scene-referred OOTFs (such as HLG), constant luminance formats including ICtCp and a variety of film industry formats ranging from XYZ to Sony's S-Log or Panasonic's V-Gamut. - Tunable **debanding** shader. This is based on flash3kyuu, expanded to provide high quality by combining multiple debanding passes. - Dynamic **HDR tone mapping**, including shaders for real-time peak and scene-change detection, chroma-preserving (luma-only) tone mapping, highlight desaturation, dynamic exposure control and a variety of industry-standard EETFs including BT.2390. - High performance **film grain synthesis** for AV1 and H.274, allowing media players to offload this part of decoding from the CPU to the GPU. - A **pluggable, extensible custom shader syntax**, equivalent to an improved version of [mpv's `.hook` syntax](https://mpv.io/manual/master/#options-glsl-shaders). This can be used to arbitrarily extend the range of custom shaders to include popular user shaders like RAVU, FSRCNNX, or Anime4K. See the [mpv wiki on user scripts](https://github.com/mpv-player/mpv/wiki/User-Scripts#user-shaders) for more information. Every attempt was made to provide these features at a **high level of abstraction**, taking away all the messy details of GPU programming, color spaces, obscure subsampling modes, image metadata manipulation, and so on. Expert-level functionality is packed into easy-to-use functions like `pl_frame_from_avframe` and `pl_render_image`. libplacebo currently supports Vulkan (including MoltenVK), OpenGL, and Direct3D 11. It contains backwards compatibility code for very old versions of GLSL down to GLES 2.0 and OpenGL 1.3 and very old Direct3D feature levels down to 9_1. ### Examples This screenshot from the included [plplay demo program](./demos/plplay.c) highlights just some of the features supported by the libplacebo rendering code, all of which are adjustable dynamically during video playback. [plplay settings](./demos/plplay-screenshot.png) ### History This project grew out of an interest to accomplish the following goals: - Clean up mpv's internal [RA](#tier-1-rendering-abstraction) API and make it reusable for other projects, as a general high-level backend-agnostic graphics API wrapper. - Provide a standard library of useful GPU-accelerated image processing primitives based on GLSL, so projects like media players or browsers can use them without incurring a heavy dependency on `libmpv`. - Rewrite core parts of mpv's GPU-accelerated video renderer on top of redesigned abstractions, in order to modernize it and allow supporting more features. It has since been adopted by [VLC](https://www.videolan.org/vlc/) as their optional Vulkan-based video output path, and is provided as a Vulkan-based video filter in the FFmpeg project. ## API Overview The public API of libplacebo is currently split up into the following components, the header files (and documentation) for which are available inside the [`src/include/libplacebo`](src/include/libplacebo) directory. The API is available in different "tiers", representing levels of abstraction inside libplacebo. The APIs in higher tiers depend on those in lower tiers. Which tier is used by a user depends on how much power/control they want over the actual rendering. The low-level tiers are more suitable for big projects that need strong control over the entire rendering pipeline; whereas the high-level tiers are more suitable for smaller or simpler projects that want libplacebo to take care of everything. ### Tier 0 (logging, raw math primitives) - `colorspace.h`: A collection of enums and structs for describing color spaces, as well as a collection of helper functions for computing various color space transformation matrices. - `common.h`: A collection of miscellaneous utility types and macros that are shared among multiple subsystems. Usually does not need to be included directly. - `log.h`: Logging subsystem. - `config.h`: Macros defining information about the way libplacebo was built, including the version strings and compiled-in features/dependencies. Usually does not need to be included directly. May be useful for feature tests. - `dither.h`: Some helper functions for generating various noise and dithering matrices. Might be useful for somebody else. - `filters.h`: A collection of reusable reconstruction filter kernels, which can be used for scaling. The generated weights arrays are semi-tailored to the needs of libplacebo, but may be useful to somebody else regardless. Also contains the structs needed to define a filter kernel for the purposes of libplacebo's upscaling routines. - `tone_mapping.h`: A collection of tone mapping functions, used for conversions between HDR and SDR content. The API functions in this tier are either used throughout the program (context, common etc.) or are low-level implementations of filter kernels, color space conversion logic etc.; which are entirely independent of GLSL and even the GPU in general. ### Tier 1 (rendering abstraction) - `gpu.h`: Exports the GPU abstraction API used by libplacebo internally. - `swapchain.h`: Exports an API for wrapping platform-specific swapchains and other display APIs. This is the API used to actually queue up rendered frames for presentation (e.g. to a window or display device). - `vulkan.h`: GPU API implementation based on Vulkan. - `opengl.h`: GPU API implementation based on OpenGL. - `d3d11.h`: GPU API implementation based on Direct3D 11. - `dummy.h`: Dummy GPI API (interfaces with CPU only, no shader support) As part of the public API, libplacebo exports a middle-level abstraction for dealing with GPU objects and state. Basically, this is the API libplacebo uses internally to wrap OpenGL, Vulkan, Direct3D etc. into a single unifying API subset that abstracts away state, messy details, synchronization etc. into a fairly high-level API suitable for libplacebo's image processing tasks. It's made public both because it constitutes part of the public API of various image processing functions, but also in the hopes that it will be useful for other developers of GPU-accelerated image processing software. ### Tier 2 (GLSL generating primitives) - `shaders.h`: The low-level interface to shader generation. This can be used to generate GLSL stubs suitable for inclusion in other programs, as part of larger shaders. For example, a program might use this interface to generate a specialized tone-mapping function for performing color space conversions, then call that from their own fragment shader code. This abstraction has an optional dependency on `gpu.h`, but can also be used independently from it. In addition to this low-level interface, there are several available shader routines which libplacebo exports: - `shaders/colorspace.h`: Shader routines for decoding and transforming colors, tone mapping, dithering, and so forth. - `shaders/custom.h`: Allows directly ingesting custom GLSL logic into the `pl_shader` abstraction, either as bare GLSL or in [mpv .hook format](https://mpv.io/manual/master/#options-glsl-shaders). - `shaders/film_grain.h`: Film grain synthesis shaders for AV1 and H.274. - `shaders/icc.h`: Shader for ICC profile based color management. - `shaders/lut.h`: Code for applying arbitrary 1D/3D LUTs. - `shaders/sampling.h`: Shader routines for various algorithms that sample from images, such as debanding and scaling. ### Tier 3 (shader dispatch) - `dispatch.h`: A higher-level interface to the `pl_shader` system, based on `gpu.h`. This dispatch mechanism generates+executes complete GLSL shaders, subject to the constraints and limitations of the underlying GPU. This shader dispatch mechanism is designed to be combined with the shader processing routines exported by `shaders/*.h`, but takes care of the low-level translation of the resulting `pl_shader_res` objects into legal GLSL. It also takes care of resource binding, shader input placement, as well as shader caching and resource pooling; and makes sure all generated shaders have unique identifiers (so they can be freely merged together). ### Tier 4 (high level renderer) - `renderer.h`: A high-level renderer which combines the shader primitives and dispatch mechanism into a fully-fledged rendering pipeline that takes raw texture data and transforms it into the desired output image. - `utils/frame_queue.h`: A high-level frame queuing abstraction. This API can be used to interface with a decoder (or other source of frames), and takes care of translating timestamped frames into a virtual stream of presentation events suitable for use with `renderer.h`, including any extra context required for frame interpolation (`pl_frame_mix`). - `utils/upload.h`: A high-level helper for uploading generic data in some user-described format to a plane texture suitable for use with `renderer.h`. These helpers essentially take care of picking/mapping a good image format supported by the GPU. (Note: Eventually, this function will also support on-CPU conversions to a different format where necessary, but for now, it will just fail) - `utils/dav1d.h`: High level helper for translating between Dav1dPicture and libplacebo's `pl_frame`. (Single header library) - `utils/libav.h`: High-level helpers for interoperation between libplacebo and FFmpeg's libav* abstractions. (Single header library) This is the "primary" interface to libplacebo, and the one most users will be interested in. It takes care of internal details such as degrading to simpler algorithms depending on the hardware's capabilities, combining the correct sequence of colorspace transformations and shader passes in order to get the best overall image quality, and so forth. ## Authors libplacebo was founded and primarily developed by Niklas Haas ([@haasn](https://github.com/haasn)), but it would not be possible without the contributions of others. Special note also goes out to wm4, the developer of mpv, whose ideas helped shape the foundation of the shader dispatch system. This library also includes various excerpts from mpv, in particular the filter kernel code. For a full list of past contributors to mpv, see the [mpv authorship page](https://github.com/mpv-player/mpv/graphs/contributors) [![contributor list](https://opencollective.com/libplacebo/contributors.svg?width=890&button=false)](https://github.com/haasn/libplacebo/graphs/contributors) ### Backers Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/libplacebo#backer)] [![backer list](https://opencollective.com/libplacebo/backers.svg?width=890)](https://opencollective.com/libplacebo#backers) ### Sponsors Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/libplacebo#sponsor)] [![sponsor 0](https://opencollective.com/libplacebo/sponsor/0/avatar.svg)](https://opencollective.com/libplacebo/sponsor/0/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/1/avatar.svg)](https://opencollective.com/libplacebo/sponsor/1/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/2/avatar.svg)](https://opencollective.com/libplacebo/sponsor/2/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/3/avatar.svg)](https://opencollective.com/libplacebo/sponsor/3/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/4/avatar.svg)](https://opencollective.com/libplacebo/sponsor/4/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/5/avatar.svg)](https://opencollective.com/libplacebo/sponsor/5/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/6/avatar.svg)](https://opencollective.com/libplacebo/sponsor/6/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/7/avatar.svg)](https://opencollective.com/libplacebo/sponsor/7/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/8/avatar.svg)](https://opencollective.com/libplacebo/sponsor/8/website) [![sponsor 0](https://opencollective.com/libplacebo/sponsor/9/avatar.svg)](https://opencollective.com/libplacebo/sponsor/9/website) ### License libplacebo is currently available under the terms of the LGPLv2.1 (or later) license. However, it's possible to release it under a more permissive license (e.g. BSD2) if a use case emerges. Please open an issue if you have a use case for a BSD2-licensed libplacebo. ## Installing ### Gentoo An ebuild is available as `media-libs/libplacebo` in the gentoo repository. ### Building from source libplacebo is built using the [meson build system](http://mesonbuild.com/). You can build the project using the following steps: ```bash $ DIR=./build $ meson $DIR $ ninja -C$DIR ``` To rebuild the project on changes, re-run `ninja -Cbuild`. If you wish to install the build products to the configured prefix (typically `/usr/local/`), you can run `ninja -Cbuild install`. Note that this is normally ill-advised except for developers who know what they're doing. Regular users should rely on distro packages. ### Dependencies In principle, libplacebo has no mandatory dependencies - only optional ones. However, to get a useful version of libplacebo. you most likely want to build with support for either `opengl`, `vulkan` or `d3d11`. libplacebo built without these can still be used (e.g. to generate GLSL shaders such as the ones used in VLC), but the usefulness is severely impacted since most components will be missing, impaired or otherwise not functional. A full list of optional dependencies each feature requires: - **glslang**: `glslang` + its related libraries (e.g. `libSPIRV.so`) - **lcms**: `liblcms2` - **opengl**: `libepoxy` - **shaderc**: `libshaderc` - **vulkan**: `libvulkan`, `python3-mako` #### Vulkan support Because the vulkan backend requires on code generation at compile time, `python3-mako` is a hard dependency of the build system. In addition to this, the path to the Vulkan registry (`vk.xml`) must be locatable, ideally by explicitly providing it via the `-Dvulkan-registry=/path/to/vk.xml` option, unless it can be found in one of the built-in hard-coded locations. ### Configuring To get a list of configuration options supported by libplacebo, after running `meson $DIR` you can run `meson configure $DIR`, e.g.: ```bash $ meson $DIR $ meson configure $DIR ``` If you want to disable a component, for example Vulkan support, you can explicitly set it to `false`, i.e.: ```bash $ meson configure $DIR -Dvulkan=disabled -Dshaderc=disabled $ ninja -C$DIR ``` ### Testing To enable building and executing the tests, you need to build with `tests` enabled, i.e.: ```bash $ meson configure $DIR -Dtests=true $ ninja -C$DIR test ``` ### Benchmarking A naive benchmark suite is provided as an extra test case, disabled by default (due to the high execution time required). To enable it, use the `bench` option: ```bash $ meson configure $DIR -Dbench=true $ meson test -C$DIR benchmark --verbose ``` ## Using For a full documentation of the API, refer to the above [API Overview](#api-overview) as well as the [public header files](src/include/libplacebo). You can find additional examples of how to use the various components in the [demo programs](demos) as well as in the [unit tests](src/tests). libplacebo-v4.192.1/compile000077500000000000000000000000751417677245700155560ustar00rootroot00000000000000#!/bin/sh DIR=./build [ -d $DIR ] || meson $DIR ninja -C$DIR libplacebo-v4.192.1/demos/000077500000000000000000000000001417677245700153055ustar00rootroot00000000000000libplacebo-v4.192.1/demos/3rdparty/000077500000000000000000000000001417677245700170555ustar00rootroot00000000000000libplacebo-v4.192.1/demos/3rdparty/nuklear/000077500000000000000000000000001417677245700205165ustar00rootroot00000000000000libplacebo-v4.192.1/demos/LICENSE000066400000000000000000000156101417677245700163150ustar00rootroot00000000000000Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. libplacebo-v4.192.1/demos/colors.c000066400000000000000000000041061417677245700167530ustar00rootroot00000000000000/* Simplistic demo that just makes the window colorful, including alpha * transparency if supported by the windowing system. * * License: CC0 / Public Domain */ #include #include #include #include #include #include "common.h" #include "utils.h" #include "window.h" static pl_log logger; static struct window *win; static void uninit(int ret) { window_destroy(&win); pl_log_destroy(&logger); exit(ret); } int main(int argc, char **argv) { logger = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_DEBUG, )); win = window_create(logger, &(struct window_params) { .title = "colors demo", .width = 640, .height = 480, .alpha = true, }); if (!win) uninit(1); double ts_start, ts; if (!utils_gettime(&ts_start)) { uninit(1); } while (!win->window_lost) { struct pl_swapchain_frame frame; bool ok = pl_swapchain_start_frame(win->swapchain, &frame); if (!ok) { // Something unexpected happened, perhaps the window is not // visible? Wait for events and try again. window_poll(win, true); continue; } if (!utils_gettime(&ts)) uninit(1); const double period = 10.; // in seconds float secs = fmod(ts - ts_start, period); float pos = 2 * M_PI * secs / period; float alpha = (cosf(pos) + 1.0) / 2.0; assert(frame.fbo->params.blit_dst); pl_tex_clear(win->gpu, frame.fbo, (float[4]) { alpha * (sinf(2 * pos + 0.0) + 1.0) / 2.0, alpha * (sinf(2 * pos + 2.0) + 1.0) / 2.0, alpha * (sinf(2 * pos + 4.0) + 1.0) / 2.0, alpha, }); ok = pl_swapchain_submit_frame(win->swapchain); if (!ok) { fprintf(stderr, "libplacebo: failed submitting frame!\n"); uninit(3); } pl_swapchain_swap_buffers(win->swapchain); window_poll(win, false); } uninit(0); } libplacebo-v4.192.1/demos/common.h000066400000000000000000000002661417677245700167520ustar00rootroot00000000000000#pragma once #include #include #include #include #include #include #include "config_demos.h" libplacebo-v4.192.1/demos/lena.jpg000066400000000000000000001514261417677245700167370ustar00rootroot00000000000000JFIFddC     C   "  _ !1AQ"2aq #BR3br$%456CtScs&DUVWd7ETXe9!1AQ"aq#24B$3Rbr ?)JJRR()JJRR()JJRR(+֜G\7Ľ[,Z,~pfPR8}°pOJ?Y?tyu]YݻP]%-ϵKnK I @]JP R)@)JP R)@)JP R)@)JP R)@)JP R)@)JP R)@)JP RZuf]7qe2^@I \]{,5dH]Hjls,#~C#x!@t]gٺkbş.3dqݴoVP\;$a.##5Fj/XG}%O|0j*W*;6K-iuD`yU:{}+\NBt#)Iʰ]f.>[CI:9W<:ӧ/>e}]R:y(Wm 1_5yCIR!gk ܩYn3PXK5J[CҔ2Kn JRR()JJRR()JJRR()JJRR()JJRR()JJRR(aDQrH%,⶜q$ NНWf /RjL-µ6}2 9W5'*rp<Oo-[L{UAhVֲS, ''jRi#|x-!&JYaE%KUԁŽwCڛ|֢n'qSPA\)݇Un zJP,pwOIuyVIJ!!BU/m-5]߹=/NSeR]k)JU)~)JP R~tU??=j,һn@C%E h0T)qpv,l@XPɤ |ځ,ye<(\ez+(NP{xfjGvk(MSحۼ'M1U%]o $,g>y8 +٢^u3fE{6TEY+gAyl[c]HFGi9 lmDbn܈K@ 'uU8,TY)ui=ߡ/흲ݜnΒ]吕tSA+ddnp _4'BuURڕ8!nhƔq)>7 jӏHeJPdqEKQ%TiBRTo;ZtRV:zK#ÁN Zu;WݞJ5uFVkNv.ls-N2c{ZsUYX-\~( ݓKX< nqCqO{z4r둊Wp3RfoJRR()JJRR()JJRR()JJRR()JJRR()JPjԚ_ W {5&*`޾ TRq+iuDEX.Q8CH+ZR桒Mp?R׽軅앩a;4 s)Ԡ+J׍RmZT'.˼I|!*Z([+x_[nzT?>bR nPNB G*UvѶVtoCrK*!) U/h%E PN`똼u?lT_AJRJRR'¿H r|ף-8eVRns1>>?GOXR1X//m??5%X8il(}#`yIm%0 `}PHBBRR~Ҕ9RoZe*Z|H *3| ԗCPA ,­IO9v&j7UDy#k8ˤ.GA$ c!ꏎ3ϭ8jYњŞz/V攺Vms*;eD@ԌsI'#[5g-vk]8QޡS֗l+S[m8q) qzr}q66RV:jʎЭM֥M}F{u~?5{ȉɾ[[ND'}$R^rQs j-/w}Nl9l'бPRpArb 5n]4TXWR d(3 +#$|-w{Pve9( AsFEtT.),b֥5? TCٓ֓EÈNHiJQs۟Kk䒤ˀӘ K$)JP R)@)JP R)@)JP R)@)JP R)@)JP ݠحW9)庖ekZ HI z'zV泐㳞 HM/[D+ɥ@hmZc ͊R}1̩ UJ lB+R p<#2 j+[s崤=pK-2;Q% '$%d.i^z rlq`箂IT}{h]6{Οf٤y*E)JJRR!ԓ@| )9MňuÀ?d]e5#Eǜ<_!@\gZGed\=gqOY5)@)Jq2qiChJR& ۘ/Iy,dԟpj.nd d8I)oO%/G׼#R=%C'#ܐ'ܮ*1rl|X&P$JZ= 意U@5r)@)Jxnq ^6xJA8‬ӜoNص ڻWvJތ* l% NP^ܑy-?c~$z|ӰyA})[9'螾Vt*rSSXMe3x vqkTETPXmC@ֻKݲ%$Gҷ!֐'AGIWuvx AE%M mA+JNpG-NKޥt9uӰI^)1=T[ݒ{*zjh.h;z:3JR)JJRR()JJRR()JJRR()JJRR(pVշ]KQZ_qǰ·%8[\tIP'\|0<+u^nK!72ۼG;F3c@¼o ,)ɤi³ĐȖȬٔjlHC-\X@QG^p}P)JR~@' =^QP?iGYU#k 5z}Z,~#F3n+Y&B&C)HP>=}(~-ii Z(jxD7vZTLfDJ} y]r!t#>+#U<r--g;y×?k¾iR$LZ3?| $E#OO}\Ґ : ()V1mɓd{9T#.l~iVp 1BOy's5\N"EYKa>JW"{vm |-jrB'Q lmJfRYܵRϚy<ɪP U4KbHms(ǂGUpTJܦ0R}Q}C`n#S@\EbtgJaQAml8ۈ# JA #害jŝmjpPPL$m_$p)p -RBĻy2@@y|wT G5]!!u۬qޕP@V?m.n”K HZXWn>J۶1/+c/m)roC+$ BNs6ceGY+8JRR()^3&GD~Tۍ)_yam R$ۮKm)j8 &:?zWQ?u5S1CyKK+BO#bEۃG}m n\qB6-g+ӉHØSRbi-(-YBСЂ9? WW)5KjCyP5J>Cݥ8HBP@#s֞=ll2,zԔd Ry&C+ niD^X'JRR()JJRR()JJRR\aܗKC[; %)1*`UWcήK|V ؐ܂V9yaQf x)~LlO%K15 2ii %PVlkL6h4G}TҾ]u 4Z[mKQHI4Z$\$e.-#)~jӹ-ځ\%Kj8R0Byrj e 4JP( BaEk9qըk8TO_rRR"CQY[8@ʖ(J><Hy-}e$:"l})dAU*4'#k/aNg{:]*ǻX e,<:2|O׽(+kKhRd* R-L r2%,ϡ9'>HFTKqA)jnnSv3=xsm=Wϒ}z7bi"zh.:ʺs8Co1ſ?1%;{=UԥYjCKiZÉ)qKȥIP!@DAkOƳ9[ŭ{2tW%9̂CX mǻGi^Y5ݞ;Q4-P]VE~`0YJp <9s;O;|%%%2a%VR7)՛`;a8[+r\ o?Alꏟ>ҿoIIp׶=Q֊R0)V=atmzAj6,ukqެN5\j`8QWpU֊P2:qJmhr_,PZ3Rھk([r͊Λ$֔y!Fws./H>?Ʈ?BFiLp#[)Pܮȷ4&[=ec$HOyP2 i'h2IknQ.⒔6r3#ׂBܟy5Ρ$M&pIqWJJRR})לKMeKZ{ɫFWIIv5]r69$g} T?vS8Ž-(N1xZ3fsa'~{ 74ii %{} U$[gt$;n6RT$Oҩ=6<"&GvpʾMG€HFTs2j`&Rw LWGWM="B>X;?ODinƩJ]ՃXI5vR)@)J5,޵T!/xh#!O+q%-R6JKr SMjС .!IRB2Y-qs|{*s>6Ǧ -7)A' Cܕr hW#w7b}yݷi4(:JKn;+f֢AQ?qnta ؆a%)4ԠVm$ܲz+Zoב^\~.&d8OV欻q q!c!@xΜ9FU!(>hﱗGOqUYSZb qtˌˏbkd<馛hkl=RA` ¿SMep'SNӑ>۪浟y*mhXl`ϙ.)Vdn)37]#rc''suWQsK)V6pvy ;}&{d+#p{@nBIQ[QI (xMź2m$s'+\`FrC w)G[cDr5l#ֱ^Cyhw nb y)xwAsWUx6ʷ=)2788b֖AHRfFd!FJgzހinܦT }Vf)JJRR()Pj.԰xdvez4s)X6D4ԓ)VB7%I;iRiDZhA|ڧ4^Zgm0ͥƠ6>I)*m *R6Q9ztMf\^ӉpIt#bqd`|3B:vn5LLUn,*Do,W*Y6IB=mzJ eǵڡ426ԓ̕TJI$!cOPR=W_sNZ eIe$xNJrTTI$q*'t(EaiN2-͠oWg'>ftoVQXHRE)JJRRC}{Md2ZjZ+*T}`@yFJI _D㚓9UaF.sxFvlzW}:ڴzC'1㨬TJUNI&:?c_;b.땅f@6qߊiJIa6)Jk()JJRR()JJRR(8}8զ5e Ʀ2I'! ӌ$ҏMu$mawe- RAqǣkso=Se^B9NmWْߪ"m<6> P> xQ~˩|# GvOUoPG }=ۍީ~RahJZ@IXJ+c21R6JBIG`Q\q딫jH9U-YgȡyG;}` zB.%ۂBHmI;pz(YVv㝃cK]MV/mq^o㤯$4<}XU^}s /us5KQ4= =Fղje:e >BRakT-,!#燰4vVȹ[9H}` +%)JF}JVH)@)JP R)@)JP R)@)JPk~ͼApuا ӉAv[*KܬM@/s w9u"4+VKoӄg O4sCVD&[*S0B>JfWc^$Ա׆4rt;IQ:y5kp[B3 IP`rAm)KsB)j)#r rqi[-6NN 2|Gղ4d˨y2WMC`Y~ԜRYigBwINMaҲn7 +n9Tv#^{<"p ){gqB77OLǵDHiԬrCAY%Dծ/Wcc%%*{PNg [.YZɚ'NmµK{yIHʕi6mi)ro>6$Ub_x} =;bȠ VQ9RW-'<EJW'&bbBxd)JP RA}[tM:=;ޕ)3d$ ՞+}Vؚ)mZW*!%'bueiRHkjE+z"ֹnr_lu 4Z[mKYHI>}FTeXlw.3&7cj X BJuaT٠1y%aiZ(*Vŭ)IsEFogDe$/-dt$c wGf:ysXiD&GU"ob_mĭ05nv 3 W$d OƉi7CM[^SpW;  }5,Q4)i$,ZIRp2y|e5rKmm%l: ˦,c!QBR=M^iJR()JJRR()JJRR()JJRW^O}\ժ tyl>㴈βg)KLzcHW< }.NFiq^Y'#Rt.X5>F4P' Xϝo%z~-Ff#yF?3ʕ>NWsO(Nք'Rdp)-t!^ϥ(~Qz09qgBTZZ d~c&@<yJ(>2+€%q%2l˃ɺo1XAV/ۮVCD)R:aFzxlǵ7v[qoÎ:WWt -_CۛNݵ-++y pHAg@gK&ZBKXBCHRqnJ+P5=yy*PHg?uVh|,'ξSح[BkRyutl]>FF7x45zmP*\i97U#'ƩfaLZ}FXˠ棻fQL4myK?>1)U\I=ҖJSsx-{p±d_c p;VdJRPI$5M膄+I883^ e ހyb띨[;Ks|<1X:ת?EM 'iO/.k($ $ʾKiY{_M2;ĥ8m)k)W(PZv߸a'${2zd׹"E2ސVޒJڞdmQD@'$swIk9~t2%H2}$Ǚj}S!&@s@yOBks{pcj|ɑظeɎy|HH6ӹViI.,mmwUS/uN5MƔpJ?b9Xdja|{ {ҭ:"4H.-'q* )- Ĭ]Cy>߅VZ?/+j3$锞 Ԫ7{KUƽcgpN|7dҶ~=Hva2⣒zWxua xhGUSRYE$)JR()JJRR()JJRR()JJRRݿtK@q[[-&\q%@`x1A*zM[q2v ʊCe*##΀Gٳ]U4@P Y#,-M9gA-zgk0/%"iܻTaHSxǤwx$é9H tp6Jq HY opH>Os;NU>c57tRg})a J底c9cdrZm(BB`:*m)duDx__qGe6FVJzǕTg DeNi<Է&][sfPGthKX: 5>-DY.wC9@w%{dmIǼs4/ovRHN_yg']c8L1*m*frlOUm}6)~W#ïִK[yj;.c}[>= Gh#.Y*>\V\$Xr(ʷ9=R47J~sm+uONe`d[\ymN2 i_pV_0㘭%E%矞|j٭ ꎘmZ@*ώz]LJmbGVV2nZ‖'l|6XmtlLaJzB[cMq%<#zVhKHt4,n#L("5FƚQlG\SNR$Z.- $ ǚbu$kGRqp3uqX~MoNjِF\L%nR¶c%<cquR-qѽ9 ^5;iν5 [Q-MtD}m{}Ugg9Oqﯼ/4A*P:4L(꣭N{r=6ee}GQ*Rɬ?K)J WRVMjiNܖ0s2^R$8#k>hNʎhN`6O]㖐suѶqm,TC~ |/jSMY-ٷR,Ǖ$ T?ZR[R &Qk\łcf‹I@@O v'b\M/N$:1[K 脃!@*7ѴY/_qZ}|M6VI2xqi1tխҥ8xJpKeHPY#_2nwvHyb eڋh=9>u<}׋z1w]E$ Iv,ʒrjBzuIL%dw(uW-3P5(sF־vþ(Ͷ%V̴N> eC:eahq xQ Kr5#^-yAɶx&5Է">gU:['ȥݍUωRH)@)JP R)@)JP R)@)JP R)@)JPwg8QKHk}KvcLe!k Ja x楞wZ#Rҝ !edQh[:)R :t{d'y0㮸p$ Mk{EhxVaoUz"" 1$m{\cµߴwi|T=2hT3<')}C!M{)m g Qqjd+olmzʃq9ϖ<ɷ{VMv\ޤ ՐOvmNZ_Z%_L{"$Hm QGyբec[ڃK܌z-m}GM($~KJ65soͳVu'.%&j8MJr@'M&2d-EÎbqLJƳ9k|G/X_"Ҳb[hY?LTvT[m:X ʱ[,^߆l<\:ũ} մ-,i礸d$AQ82@[4I~WCnI> OΚv{'JIuƿ,تޥLaYӽ1Kɐt )NS~h.1%;ebğ[o* O2Iyt{t'V[-h[%@ )TAl֙K-칏%2IyT.FR;S=ˑqy*G\aI)#zg5|kAvU0JUz }y e ꤊ;=)OiQKq. N[9ZAW?hr7W6~Bi[VqEcOÕi\{9S'9ե!n(6*N㱽IcQq;Ѥ)dOBC'q87ݟbGV4 ;]$aD,lȥ'$U>UQXER*߬0c[!G;Q!m,6J$JI(j >kc-愰ç{X%O3[YQ0]/R mZriaQ[dq$PҬYֺuv+hI$ Wy2O-d(鼤MG}n~қJ5wZHO&.O¸s(҃Bj`CJӇSmZw[x,7#|믝望uZ-ςIYqUmJJ]K' ?,]]S-.)nsVSc~L۪Rhs”)@)JP R)@)JP R)@)JP Ra8҆2(kMMvꋥD=mE7,2|C?AStҮ0\od})CrRd\c=m1p`ǵۚQmQ% n=Iv7w ϫyha:W\5]|ۄ˚?%µ#$<&͵!mq(@ɉɨ[>mP|IJ 6+=Qϟξ%^A@wFT[ !>r=4]iy6(v$q\^N0msZh [RF1,U{p .vUy!Wޖڸތw`]Y=`ἶ)T?krJj 侠62I&/n zg2ap;ԙ*<4҆n!.dr֥JkˏBmm/{&]#%.\:N$#r Í$*I?ZqދS8=&scVqncA~3=ΡM)2!'j Eh.̓zXbG i֑}VU. .>>m_kf?oRb8r dROuj8ʷ,wӗ¹]$ϖOt쩗"›. mpN'ݛ N(vݔr7Z)@)JP R)@)JP R)@)JP RK}ƻ0iԡ!.q)Qi9dʷ}IvU)4jHdĔ=QW;(穪eG+x}tkMK-lid #r֮V6hOur6cJnyYWښ89s᪮-˹.倂 qAyuN:}9!Nт=`|0+ӻZs܍۱I n +E_EfJg: K$~PrR:tjɐ.j|{R bHjQSםKk'Nc}**&%NUdt]fm}M2Z5j-R| w%X [!$J>>%: q(C9ծV2|H|4TL(Ҭ ~vqTçw%=ܯF0?09OK8CR@Uf\pB YI%YQ(jEku:JvQ妟*ܤ(m9!=3[Tq-D̈y $% )g7qΫajtV287$ׅE+ ^p=-\L1dg\e)+ NU ;[I kR{K9\I6/ocmq֝i9iӰB Rm@H$Vvn%;PZ4]ClTY!L on,3{](!+*ؒ[Jmߏ'U.ѓg__Cw0и4rːV[R- ڄ)T3~ՖhwDk}͉1wIWWiL9op $Bl-=]q .YM8zGwMN?m/3¥0|; EcpcA5!X < 3ιsFuNp- ;9aDžKMbcc/?z?7KNLVy) JsQ^}ơwC>MrDSrߛ}rly%1K -P IyCΪY%[4|[*acT]yO-AԨ8^`2ÌW ~j Er-M3-r.JSeuݫ];Q԰N^zzp^DV.9,RlVYz=1Q#%:) ޵drIɪk nB&3@FYIXcx @;zjUŨo]0!HRӡ^T89A ֮vE_bvD>>Q% Ba*9Ԓe%)Kn꿴WkuvCysO*Aڔ!oVV[ChܝT-v=\M|3ԛ.H9fQ&v9sz㼕w @Y?![ER#\[NO> mgr}ըV(ɽ6bRoSl#-^>O_!YC6?o@}zxN=hyouG`)JU R)@)JP R)@)JP R)@~ 's$և\./[^]7-~/wmm3sRSxe>K;Į#O] sb ]7IVzGz_$sD*WobC a*lfv.s%n+Ñ2]\bZEEM!~:4Gv#ygjPURcnTnmPa##˯S;&%"NZ"3Oq_U !]$! c|\mmNMN);r HTEbG0GJQw!9xQM0G,PgBpzSLDqlGeE' u xf Z񛈅M i?UvT3.:M#tJN AC,.vwaZOg'늦tgٹ:۵Z[@cki$+tʊRr)qoKpoN}nh˝"BK%m69p +F$(Pw|1r:aad~2D7)@Em)R- vq/ɜϘ* #$Wbu_m-Hv;Z%)ǐO?_J^|z-Ѥ5EȍrS61=G !Dp\=]Prow1.$˛ꃔ8B0y9w2(һjo=-xzORzZڤJr)63#(s6(PqVӏ*XFY1dӲVjRy簧 .9Q>=ϩ=.b5=vrԂȩo(G \x.T+ι5"8+f(hV ut)YΕURmi9kehINniaIJRb^e%!}Y>X=ث?l5zx!b R/2ZPJdJOxچUCP` ]%$Ą|DK'hUi Q5v6`fTnn8ݰ-3*s:]yrOǁҜA:OǸj4YMwsԖ<a Q%إ;oĥ} ܖTX$< Qؗjsi:>T%ը@IuNxYq~V$EnS&I9 B[u So$(Z7 Y OTbeNqUÇ^|[΁vcE5IkH˳]9Tu'Aʜi |v aXGjBmr־B=cnHsR/u{w ^eOӲ,8ܔ )R v*IVtӥ<إ1hw먑uu X QJB (HN}ZtiTu%%:jP91,^Xb. R1PV4y2+K_O0'uݢu#].cʚV8٭uANV!]p 9@X,Y}:/? bܤ]Wٮr_k =nT5o1׻@ ϼx`՟KQC$g@ }sNu:_ D1Q!VPQ#q'U>߻^Mw+zҔ()JJRR()JJRR()JJUTZjw|HVO]'ͥZӥ ʉɲBM\Is(q]6mA O [HR W1V'ތd2xrUI-yeuT4em+?5+[YLgߒ]]2SQmJN97J7 j3aʺd5&,1ŹnA{fHX$}wۋ82|  ݽ))X`<޵r:j{~Xe[})+JBycsfz~:۷<n4GE%%C>\Uǜ1KIF@=[>ϕgC!)J@s`:ہMQ~o9Ra@('98Ƣ֩V7:q} ]IQ)B֙QCv3がfghamH} IhYeȏnXtkyKI<[J2zr U`;['F[ rC<gʾ{yiI\S4^;u_]vn5xnW/HqRR[)u,1̑ζ/-|% ˺*bms8N}`wIJ#I|EoBNj,(nj9*KOv`(tQRHݢ.v㶗RB+D$CH݁T(sRW]Ϭ]Tte>‡򗯏BIhn+⓯d6fMQ#$'Gz*Dzތѧ ==r2BnW\ߘg p !gxunZqrT96AH>3+p *-Z4n*) j;#ZO]- \"Y۽ q$pRrG=+i~ͶOdH5Ɛ$9# ʪY)5>L.@CJT@)y je,vXK 2HB0I$WyxB`z)JP-vZyMl]sӅ2DJK8PU~Oʪ/KrSCrIݟvk഼u}6 #Ҡ Л =h";EiwqPAXprZGQyu5-p܄FIJ#}{QPFgnI82IU~&c,-2!D{\+{k SmGzN5%/d4*8*s-Y/bFޔQ$% H'hКN֎c#AFcJΤ _}|k>tXY̋WIm݀(֯}$mtì⟽iW9H^0ۤ ǁK W?8B&N63'b8ܦۏ 'F`q&Du:m~%ˍ )Ke%`8VG?*ͪ%|:ާ} n((]`uOUhܖ$Х)^R)@)JP R)@)JP R lFpJơ j[:Rg )?]I'ur\;@xhؖU3p ([d6dx]oifS{d~ CR=qJ3| jWөf;qԯa`)>F-J@ vv:P]NRS3^>$.0nZ\)Kv> kwuROwj=3=g_K~Hi$DrYh{JQ :X^l 亷98g!)1eӳ} iV!?Y]CG$?!^ sKxڀxr8&:A:#Y^_:y)JLv8 ^*ܸ ɐh 9Aw1VH '%k Y~Dս7 קA[a0b[ihJ1j/kS_ABp<pC+DiJMQm֡B).(FE|[{^SU8 +O6]R8~?BY=8 ˧]!6⠺ZR\JR2H*"] bA[I^ܻܵ%;L%C9m#2V[9! 5Jn2mW'7TV[xu3lUiEsOqx3݂iðP4B(q<; ݟE|J{<1x:jp('إ)^ R)@)JP R)@)JP Z#RSeN)RH#j"M{v E{ZR-w%@Q$k}`; TZlkNsVż:Z[>k)]ַRˆ<:W15 4l;ֽPʠA9Wk^WQiu|?՗x׸v\h>ym7{LvL{x n$Xn]i#|k$Xs<1fu 8?_ _ K͡wiJ3$ǝ^t{%苀* R䂬'KHMmM҄t<~t;Ew{PB8Fp?ARD:[d? ws9-ԱJҴ;Oc[9H^1R1# :Sqg0Z?v:Z z8(HJZh#+oU=K˯JxeI'ծ mwLҔD;J树?Z.l;}Yf*Ṷ輖ydž{ _8kހKӖZ2dCd X7::]!NLs_NHiH$ mBGhrLV=b $U":{Ӎ>\T<-y*9zt(qՂy~fb/q_V 6\w{꘻VPKp'Ȑ?OҡvJul?MH\u } Ґ\v~y9u_b FuO:< OP2eO#㞧T DJटD{q.A0<*{Ug)^ R)@)JP R)@)JP xǮa[:W[4E79ݪw $ddϫCZu3-V[䔵m!]JuD$//,0F zs=ȾO# %Ǯ ܽQrڸ\ӄ1ݥxen7tqn$nWL뇚GkN OgOƜʤNwK$ $$`4V+5Ɣ m^RrR #9  ܢD[KTD}STtʿ5hΜN8>{+71E. Z %5Y \GPa@ּז9 +1VpƏ=Jvn2j=HmҴ$/J,' ؟CX%> #on#H8*9隿pK>PۍnQN/X/:4]]F& #'Ӷ%еg)Zuc8$'?#R}8v`#oVAQ-2۩I U`rvEQk(UL%I(pc9㳘 rٱ{֖v\h,=}My.)ca!NaJ–GSʡע"iS%B֗mC͚S|dCjm@%))KPVvhpSOE=Syu/(c)#=mNrQn6w;T aY.q+H8O^u}f} ;n%@$JU븵*>\]ykMڧuc^NY*dE熚FVi)Ķ8d (@))PRj}eYw7Z"#= bFjRFIK#NIlj5WͯtݵwoiwOƥN"å%FFK6s1.{ړ4v YZxB)@=݊n]cRQ ,>8V>*qoNk<֜_3qVmuUL[r*JJI>wί5&-I&mn)YR)@FÅfkuRVI' >N|qq/K Ύ^9?W*rB$is4 \!J7:|?_ yic뗞Ӝ>$3ʦJW9NMs,hc\B,3%KqK QǮRBn{+Uxt֔76 YsFW-‚P<𔁟* [sc҃q=q!T|V8>/>t=G?j7T.- lU5w5'|47rQ5NI: t9r=*HG,{J$u?)V~9# ׯ -69?.'a%U~NxsdQV8- ݢp"w\N}쓥qzS Ϗ[7ZGO^XImKRb_*'j\ar ʹ9jbdj()JJRR()JJRR(!%J!)$VCkOA9SoruD[$H=W*U[RY%rS_e=Iه7ɹ阯%Wqs%ݺȜduevre U^Ov`M]uO]QgTv[Q,sH_{Z4%ϩU$ *3Vڒڎ |<AuV)`Ai*z)XHk(t_CMms)'OȪmg(l̤! `+}Ds6dF˲%NƐ:~zO*^PZNoE0|oa [z>F62;׈$`%G,g暗97bJTPS@$xݻ>u犓;D&D0-$m m}R H}czP`\֥OO¯)?[weW!̩mNnLC{ K*P'>'ÕcZz)#;RW !iUGկM%H+Z?q5IvpاS_(TPWAKrjۭKQ %N;ԃd|+uM# φ%V&*C$(%;,εWNT!GIԛovt#sv2RfGxy)R@pP~> T;ڵ [ {|pޤB5wj7RQJRtoiutŸDV#ːO^ԀeОмp[Eh˙0&%DaI#(> r9J-pM6rf)Bn%% (sH)<URqqSB-IǼ'ϣKTN9J]!^3:-&+q$I 9xUUktdХ+P\4.sf;onVI|gr|x+IEfOB[IIe8qUhY7T]!_tdjScB˜eŷ#1zSca*>앭P(xJ8z( #Qޑls.+Gy#)o޲prO!Z~#7,=GuHqXl꽔s;G<%>u7JUpT,vpyI3 :υyp9ĪmՂtHQIJAbqWD? ?LM`-Y:xIW>#Xn# cޯ'?zX8^Q_ yG_c8~kIP d2ꇟ"޺P0RA_|&O ܦ[$'F?LVRkK*ݯNwYڂT(% xʳaﮎ4>[ӌy83bl# R)@)JP R)@+_m%2Pej-MP$yOj+!JD̉: ⾐@ZJI) zme{NOⳟzkk(EڳZnឮkR7٦Whm/b=!I  8 nou: |a]fSS$s>yύ}sW1!_q"9iAz2r\svgog볚TIo *BД m l+q9Wwi=NVUMプ5ׂ"> g!_*+JJ*oz }.@(`R?S_CȓXֿ5*%Jz6 dRB>xC<=ƱyĊc?""8ʒq6H|2>[8[FJ}`TÈݵm)i#>Rr~5Q'8 GLujxdxjLpCo+jsӗCʶoS1q[܂ 7>n*8R^k~\R|[el.|&}BU ӆV[Z۱G>s)B*Yhkh*,h> L6[qiw o -R$)ˌ xVqzqoiE5W_B-ŨPjHܧVyt s+UӡcO~צ52jn~%ō{Q WJTY)* =ŖdGZB@HլgjmTqH )%r0=BR|ki+󧴛b{V HC&P+}D3fdXmP?ܳ2JWAÍRPJFvﵷl/"8};e[]o\RAJ؎ѓ)ğH=%cvBj&T)|)$[ooX^ Ws~}Og@"][!siڳYC@5q*iҤ wa>k^{}Rd+h\I]1$l4=\tQ9?OҤ8V6t'h֜B[AS/1?Xj w :uOZZmx ~%X ]?IYZ^$R<8)R Zϙʓ*7!o*^ҜH 'dSJ޴-IՎHsqyrK,ӧ㧯{ӲecS-^# WO2xzdDy/, LyUBR~`v4kD#D)Ohl`=N\kO;[%`8y'/bMZq68ʵ$} ڕ֗ Jr#_U`W R|[7bM؎G\m䤨GֹmtbFD NRr S :ViXu6ǟieUH?[Jl'r5ڴ){2ҝ I?G\$VwWmC.Ϡ6Q'(H8,`bE[Uw([YZS˟2GO aJ8K 8{\߯bnAI5mY!N Č)呕 iQ+ }Wrkb8ɯ}J!x]1Vr-`(?5:) Nm[w jsUe8&N 4W"}?txp<yD @ Σ[p`JI+NG^8;(0H^O?gu_^Q`UזXo H9zi*yY_3OT}°MW\z9Ry42>'\NiP>4+C iԟ<uI˕jcs0$8٭֝{fMM\"06 +_u hzf> P%.-l}2+sMG5t R)JP R)@)JP R)@+ kEVP_iᤥaE@t$P+vͻ]e"EY䔊m ?3>Gp<?qpP匒#ZX\WkEi/Z~ۤo:B@8{7[I>k` G_fNP; 'm_nr5V4Jhۭ,bV- Ғ? Tv)K-%8 ?_lOõZ|zs|eyllNҒ"s H<>moSA'j7Rk[)yy8}Q Í/eGrxy0$ g޿[Q)lrBJzTX-I>C?皒#qb<$602b}e7]_ݺM BԐ66I$Vwnj<6RKeI$^@L+ڭQ$xN4en^έA-7zNz$@n6zV߀!IڴdZkRE<esӇQ:Y: QہbeЖP HgvD;N2\-VhBT PP R0\R[e .zJthT˓+r;,(Flܥw2HEvXQ,[mFh%))Ikvt*B0[3^ߞ pxapj|`˞TPAҲSH6-niZ!垪Q$5hUx]E.zVQ#*'塍4jZ< '֩.7WP-aN8# q#SF$d6Ԋkճw:QH*%-%^ u?~\3'ZO^xJ"ޠ36ɑ'<A UVJ)JJRT1@P.ө\8%(h)| )`su}9^i몽@TYx|@5R5b`˒]Zw=qUSjy,-i'Zqqݶ,ÎJ!-qV'5jx͉RS^S2'OԚsagw# QlnS,6Vh[] l$?JBBq0Nq(X?n*W|uuj9D?{IW+I~LXiܪZQIX8L!*].℺[:r3C÷Tާe)# miVN9c? CZ5&i]G,3$-w9l:S䎽SSĕwXSPɃ+WgMR=^:xV/I1YQf}u~GJOn1=~YM\^ӚEj%> KgÚƻiW^+qR()JJRR()JTJER&XC)B@$ 'SJh !|w.ewJU(*J,=FpyxO߭ˏa%[vNyWVOp)@uep.#oIu¡Ÿe=!\ľ $] ))=Z':uOפh N37;Eǣw-J@T{=Dz}?}EXXR-9:ꤔH h5q9qi@^2N}kSiZ\\aq--I.8[”zJ>kJBGGV2&)![6H?\uMFs(<{3B q`x {wFK-ik5 ]u!(EH|;⻺l&s&96[wK7M=,܇q-\y,4Ӊ< Ryx3IMIay3$:ه]7 =uHu@waD\oV9\`irO'P$T"ŋOLD[[? !JrAR|$r}(0ɖ|*^oJu*S}R9#a7{R{Suyekk&1\8iRLg5S:A\RAB\W@֧+/܋Dk"{CWh'[_+Ґ#I =𭿬k-B^)L)6)PJFNIea.O-^P#J 0+PylU<A7dc9p?­Hr69[V>]u<2?*RO~~5T>i7$ORlơuLqsMMƶV0B4gUC*ސ|Q-/?/)[*廝1O/*_D/r$p?yRˌo8G̐{$rh[KR C~+RNQJu3ܐ PsH>⻿Wu9k)[)@)JP R)@)JP ]bXS.SDX\pBJOk_4hl\T5*P[pHX#mJKyT;i^\Br1b٤UK8# flǚ`0X硅 ˡ%VAZ+y㋚QS-hRZl򑀔d 0Gzïj0E@>SP)#$3Xx:OMnd rWxԨ.(x(V2\PO'W;ZL#VsCW) QRA-a|AH~X}xYs-]dmTw6ݒV99bx%)$^͵>o8᎜5'R[YWk8Jٔ:c'%?,Um.өj^r,z `~^ DiڞJQ 1WF$dBJT2&b,KK`ZOBIc kq-{6IEjӮDEw- ;GϼO/!Q&b N>Hf%j9+m6&J$@dž1S(J5 q!U7mzNNd?0teŕ)BO@dE[8˱P_tҜi@<2~uP։}% aI*.z nCDfN+Ҡ] Y JBB@>{~F3ſʓ5ݕndYm5 y%g8a˩Ζ],:RZJ}@:νS-.j%d'c.YS&Zǣ k}ҔJRW˨Kn8cjGҾ@Z$ƃN"l{j>±f_m.G-I*RV<ZiZeFD. NaOfQPZ9X|: 86WCS*;hoWUM]u2%i)PIWw#".,{T d2e9[O9:*VѥLg?fzoSNlm'Ȍ3{gVGK]eǘׇi?Mok|J 9%?!XMw--ΕYpAUMлK;ጾ]͕ @+m~^OFxia+w-D5&apŻDhRƟ)* ;9zΆP}7aR|1kuˋ*YEA#6-ʊJ\iJAHQIU0TkW?dN . MAIUԒ8^w#զ3ʘ>7ɩ#i-S~?`J,p}AK\ꕎRQn% )L>$F7jd9ZZKCjחZS* Vܔz FGOO 5->)P:5U-9Vgy8>U|kѧˉm`#<N#_\%6$Fqm,+gT?yU նt6Zχ%ϚDe%dBO/w#PV]3dsV@0.2#֟vޣOV\P (=˝O.e'?Q_%[u>s[ˑ#5!2џAyx9G8StۊNJ/m9 W odUʓ̃>xqJA$p<}SZ]tf[U7@<жI'Jj*/ƙZmMĕAKZBK#[pAUApF+e8V/uzKEhC;ШCQ'&ꚹ五y?K4n,o*)Z?^--`L i)^r1C-~Y.j^v~֮JK^{N܊6*FA}F}jfief&bH,>0U V70X$<5;RgY!E c9zt5pbJxqڃLӊ}6)RSŅ(dj>Uv[&gQD,ˏ)P63$cA'>U]zNBcIPfPBz;>5%C*J8d++J$ ~a)@)JP R)@)JP=cYs ܊epSI A< (kgGyJ&ZҔ]܅$Hw$ںҤ}T);BvX 0.6HqDjr\<@chl|Qt|`HI>G'Zeb"ۭڎų!H**cn!!sRS$ҠΝ"_ kj^mݖxzkk%WTyx٬rSKKCiǁݻ$կC=E8+u>WfQ5 -{@ g S_-0e%(o(€0yyT4{ p|C|ƒ`ɒRTp>̣ݶpFyw:LGZ^Bڦ?:LQ)9`pM[^dHvtFҕ4rGZx l66pGhí^ʙ[nr[H@Qm<%K[BG4#O_R԰InT?ʥ9]#R$IYb#wZ@f pݒgTMO- ㌡8\Jc5\ " '(u'#8?ƤtT\'YI {Ԯ&O6uG f1EbvXY>{kd8 :V2Y1P JKrPK HnS(yXܕ$"\3/MvE7V JUn>*JdxP5 ʘ'=J~}~uYsڟh&v;9>h̵@ֶЭC^N}Ⱥ6,]JT2Xh @SP.3&:]rҶVQԄ[V銑R[_DaAaUy&%,Y߹댸<³.iKe{qm䢠}Dҟ[^ r1sK>\FN4@qꕤs-ƵgOmKkc9tlʊu VPGi#5)v[^!-ɐ2I= H:t۵y̆!Ҭ w\r8Y7)ˆo씩Dt_J[H [Q*8g>r'BMfpV58-Mȵ08} Q*GvM)Z)@)JP R)@*w 4~;'=2R@[JJxN֢O;y+^+m5ޖr]aqJqJh)I>@PA)_M~$RYmh>uEKDMha|Odu]¸%z>CjpBSV+E}T:)RB~"ޑlvU1Wk!Jϖ٫^d/Lx}-3ֲndƷ%BARN}jhF[XC^ |CJZۃ"IZ?_Swn_:?P}ˆu_qП _9|foGCe-eJON99+i`R]G1ٯPgSЫeN${N9@·+4zJySC&PYm' qʲ`?`ԝnm]x#$ 2:&4rY!YH4eurge#@QܓϞԤr睵tguΧ[v[P3T@$( B?bsSudL\~3v|\%֤7p2p]yІ@H4c?ܛEt/eГZ\_tzD"kA^)֟MG4nƊ[kXQhWTgsZx4vQjm{ oz'p(nD={Հ # gyWKx5śe`:qNMH|J ([k%I88ʀ?if tuCe\5]6 y kqJ{zqOL V^([h\)¶8F?(sppAMw٣whLYCti0y܊RKGΥSH,6m)y w{1%*PqD2o\_/3c-crm C6Lyxsuzknc31^5qk ZQaĥ9HmlS̨RIp>Hoyt]ePV$-/ HQ~UrNywlmqQ11o#ѻ_@4eרޅqaMLC*s+RbMb-F Qy=l:ڌ7Ll gV#_28~Bfu$\mmBր[ќt++`LjV.$p=+g͗ab 3.GXJuX!螢vJȻE`;)[}}w%)RSz'ڃ+RK>F[SvhSwC+VH|Xꟿ_ҲG5ukmFfRL[m0r2p u" XKekmK( JIrEKO+CIbƜB?Ŭc^KijbҤ-(ʒd~q'&dL|-bTuV0Ϙҿ`^ޅ1m]rOqQS3 ,!y%\\Ajy|>lJ'-:w.$qooNIy°S뜀k0ڏdU6R N|>D{S s? ƨ3q; RP@,\>gEur1Mj_8~P ('ĀQ;DuDnm5|┤dͶ ˆT[P)[KcRP •!:n~x_a:7}/m[,DWgSM.)ΰ3ٰL`ARAHHAJiC ?P~\Lrq!ԭBmJe%+HRM%I>h&hepx)6)g).1û}܁W(P,JHC }~=}̴>׼7pIlLO[,9@F.+ސtţ"JNs姞ygx: 5q% sz$ޣ !xHкBpط3JYw>nQhFzRjBR}6I՟Q6hgPId!+?z8e?U_3j}G@ܵɘ͵!2֊I~͒#+m6w{GqA4ۚK-D{j݂G%Z[r+hm pkJ,9-S봩2|2a Y>f\bŇĻ,ib0OƶNCž#c4#0M%  x*9 LvӚ7V*cvIjbmFvH#0GI,ZEZxqlZDZ"7+9KhH'ĜdI)@bVͳ6Ժ2u QNR}W+j"Y:.%kn٧]bKzm]@HlgD3ӊ>ybҡO΀r6x7L2q"_`m!Zx+=d]>FKcֵ~TO)JP R Tֱtc>dj;TU^Vr0W<ԑF:mn8R:kim] .;yfzMVJbƚyR{!>ۼ+pؖ./*-7M5di ,r%[%Ԇxk2S² ًDũ1K-*N6 Y7W+c8ΞuOB$JFG/d]8&;}o\t8I.Y)X;Hs+h 9ϿTPg\-(0xsjS֫t1jnszCᩒ%[JQȜSWlQ&w5Ď%>8ORLeÒ.Ym;O#ʽO>Aw{N3@557-crGLyd~`nսQ%Fmq'j8>Jt4+)`:Ԭ((xVr >΢@O[;Í,D무)K(m:֔XVMG I gcdL%F/)Ģ"2*tu]eK-d,duբkQS*D[_wҰ;'#Oĭ?폮G]u\s1@2/nI)rj_\Pg46ЯޣYW z=%$)y4`~#[8nS1֬!-ˢ!J;V_8Cd`tiϠ@YK~Jj\(mNgYFZjVR\Q$~TJ,sſ&2Oq=^/iV9Z5h-;rWjHYHS+)RT@$2pUWvZHKM! W_%I6s+fn[q5 ٤ԩ>a)O|c]5읡KзŹ32}'sO'ޕq##5鉓h=kj>j$*xΏ-!@"3<"}5٪wv "z:"{Jr}%>U9Mtgk5=Q)Jo[>j`I؟L^0jn#Pi79Kz̸a#dV _lMr^[ݩ`0YP]H˧_.vnϲ]m !G'qƳjΡ8RK-_q([;? -T]BA$2M;^pu =r-#*P(Tsѷ}i=\!Đ{>6 a1T}si7DŽQu7yڥi-G*hUg 4m*cc-eJ>P)JJRÙ<]ֲpF.lz S9v9k&5Μ-ċsE$:ZhKIWdGƺX- ք[IB!ZJI0 5uc,*圚N2yg1;TMní!kK/nFT4\t%-ZjQ+vSoMfn[{th]p[Fc'wy<:֦zeSBywk[2;]7&kXLJ~.s?  ccWR46Y'lޣX kf?1hWx7+'jSchdž*Ѽ3 &qH*O5˵NvH`~N>+.ݹ#~<]J{K]ym,rT>53'J%O)%[ܮOֽu5 <* >uNM1dDr:{$W԰y-XJ\ueED4÷ˑn# QJyXRN35pO˳Z0Vq#Y#'?psSi$D-)^IR[(cx❳!!kJBdN]8\\0a !Iဓӧr;slQ)j%ФM`ӎ2"iPD`+k_>ke=r 9R^%y9;8h`VHv9Q5rs9Q֎RhT`J e\gEoᬟ":HRHԑmMH\7X5H!aBJ*p(`''ae)t3QJ-BҰNRTI$`%3T+;*p$$Rn 3;!IW<"w5hόW4M._Zً JHNnd )Ԋ.3*¦ O{uZTeH؃"OpRߦ7oXJ KHaE[q.<,Jv8D]RVk)XBWǍ9w8-KswL`Ra2C%$ mW(v9$&j/vkOhKz{W=7 ^ÌW) ^j-/']#=J 6@LJ?>I*TbG'RR()JJRR()JJRxizS*%Hw$# *qZcǻ u WT02Tuq"$0jiĥE$C#kXxK6@ ) NN2z]ܜj/"z8PyjDh%>D>݌`Rㄷa1)<[!Z4MAyYh6v*>q=k8xÍ_1&>JOͫ*J'=* J⟉eQz+-n(@IW3^^)@b|INvŬ EkpQA+Bd#J'%[¸-Z=pL3(Q&KJ,hw]ֵe h>{䪫tWP5]8AE[e?ji1Ƽ#CiBIOFyFQq]NOcoTp'KNxc5x7qFAYE8ÆjۨFR\qd:tNg #$4z!>BˀZhw3 $=,d%ҹB cI Z%$uNպI꺕!ViA~CK(6ec̞u1qCZGJU6kkhl'/>m?kiZfNT2j?Y]ȑF ʕ=Mgh-[- P? ?ZqXFQ= fv=mqm(Jm)G^#4],-hZVdgS/cȈN#n6 )'yMjxVihy% {#|$S%|Ì+ӊ@\蒛r:A? 0=GcEnJ<9O .khEoo;<|ɫͦMiT ?QYr{C2Uw#WO;q8FpFOJSD2o.+Ƹ5~nn=mw>*@V]3-*ݡQ:DY9)=B#+lM{P^})-5)h؉!;Na8J9U~y; V>\mosJ.Rq&ep2f_yG?}C׵%K O,S;|:朗JRCRH-m~=*/ҺfFp&8Q {DxdBʅH[gw<yVu%!]zCR\ai[ь$+kl5'R- ڐ6IO>0/!$Zu%m&E=1SSh4vb[BՆidmqܕ5vK`ۓ]0.,R0) BZȳC ;J+􃌤ٴd\2+4d!q'À&wMNG6[8 m U(Ҧ櫩6cڪEH8wHקw`=!'hH)ܤ"AFnuZ-$[-^Ԍ)p`>C -ώү ^.-sg=|@yK#y8W+H/^:,ޛ+OJL};EI*KAa;J@(,LzGK@Zӧmu+qJU GĜdM]Q\XfX+)JP R)@)JP R)@*0m)Jf됢TA y^&b0;BZ+[\;Aϐ'g;ԟ&[dzhV6уQo=2#=1[8Z,ƛKuGykN%sXf[RV}"$$1S5[XSc~uz RhU R)@)JP Rlibplacebo-v4.192.1/demos/meson.build000066400000000000000000000066001417677245700174510ustar00rootroot00000000000000glfw = dependency('glfw3', required: false) sdl = dependency('sdl2', required: false) sdl_image = dependency('SDL2_image', required: false) pthread = dependency('threads', required: false) ffmpeg_deps = [ dependency('libavcodec', required: false), dependency('libavformat', required: false), dependency('libavutil', required: false), ] ffmpeg_found = true foreach dep : ffmpeg_deps ffmpeg_found = ffmpeg_found and dep.found() endforeach nuklear_inc = include_directories('./3rdparty/nuklear') nuklear_found = cc.has_header('nuklear.h', include_directories: nuklear_inc) if nuklear_found nuklear_lib = static_library('nuklear', include_directories: nuklear_inc, c_args: ['-O2', '-Wno-missing-prototypes'], dependencies: [ libplacebo, libm ], sources: 'ui.c', ) nuklear = declare_dependency( include_directories: nuklear_inc, link_with: nuklear_lib, ) else warning('Nuklear was not found in `demos/3rdparty`. Please run ' + '`git submodule update --init` followed by `meson --wipe`.') endif conf_demos = configuration_data() conf_demos.set('HAVE_NUKLEAR', nuklear_found) apis = [] # Enable all supported combinations of API and windowing system if glfw.found() if comps.has('vulkan') conf_demos.set('HAVE_GLFW_VULKAN', true) apis += static_library('glfw-vk', dependencies: [libplacebo, libm, glfw, vulkan_headers], sources: 'window_glfw.c', c_args: '-DUSE_VK', ) endif if comps.has('opengl') conf_demos.set('HAVE_GLFW_OPENGL', true) apis += static_library('glfw-gl', dependencies: [libplacebo, glfw], sources: 'window_glfw.c', c_args: '-DUSE_GL', ) endif if d3d11.found() conf_demos.set('HAVE_GLFW_D3D11', true) apis += static_library('glfw-d3d11', dependencies: [libplacebo, glfw], sources: 'window_glfw.c', c_args: '-DUSE_D3D11', ) endif endif if sdl.found() if comps.has('vulkan') conf_demos.set('HAVE_SDL_VULKAN', true) apis += static_library('sdl-vk', dependencies: [libplacebo, sdl, vulkan_headers], sources: 'window_sdl.c', c_args: '-DUSE_VK', ) endif if comps.has('opengl') conf_demos.set('HAVE_SDL_OPENGL', true) apis += static_library('sdl-gl', dependencies: [libplacebo, sdl], sources: 'window_sdl.c', c_args: '-DUSE_GL', ) endif endif configure_file( output: 'config_demos.h', configuration: conf_demos, ) if apis.length() == 0 warning('Demos enabled but no supported combination of windowing system ' + 'and graphical APIs was found. Demo programs require either GLFW or ' + 'SDL and either Vulkan or OpenGL to function.') else dep = declare_dependency( dependencies: libplacebo, sources: ['window.c', 'utils.c'], link_with: apis, ) # Graphical demo programs executable('colors', 'colors.c', dependencies: [ dep, libm ], ) if sdl_image.found() executable('sdlimage', 'sdlimage.c', dependencies: [ dep, sdl_image ], ) endif if ffmpeg_found plplay_deps = [ dep, pthread] + ffmpeg_deps if nuklear_found plplay_deps += nuklear endif executable('plplay', 'plplay.c', dependencies: plplay_deps, install: true, ) endif endif # Headless video filtering demo if vulkan.found() executable('video-filtering', 'video-filtering.c', dependencies: [ libplacebo, vulkan ], c_args: '-O2', ) endif libplacebo-v4.192.1/demos/plplay-screenshot.png000066400000000000000000001365151417677245700215020ustar00rootroot00000000000000PNG  IHDRzTXtRaw profile type exifxڭYÈ/vsUe&DvY??~X˥jz~et5~׫y8ğ^?׿>g|W~P__?>qz|Ҋ}](|~}y>~}wۅO 5Y@7}%)}I}_# '˪ܟYͿ?u o_׿.^sZ?롐?{wwӍ\ izGyF.Xw6~wG.R+Xv\aOl"Qz-YnlNd.қx5XKxw:7ށwłJн*|ĉ`]QE29}]$$:*/߿&2X^;8\bU[xcO.@wa1T}TB B .cqPJܬ2*Q3-?/Y$BmHA2FܩQRɥZZʨZj FK-jk7k{鵷]>,ZUkl n:Ӄ71L32lOcQ>+jek'vmm{p(O9ӏq)nnvǏ.27Y㣭}_"NrFb{}S(訜rNSμEDVY12Or3町ER/2׼Mֶy^Ԇ O.$ wa{~&b:xCϣ7+L'o?m sQj,.k||"w)ބȓ_kڝnϝ΄hK\;m#ҚpvMmvJOq9]/,){ 5tC~AgK6ӷ2iAz 7Bwo7[0x,e#Aq&d߮$UE~ǿ}->&vpDZ% 7S?vv.Y'U(2$tw$YOid;Y8<2C*;j#TPXBsCg840vc{8(cs 95ihWY;]l}&hL/@0>\\&Uc[ (\Q *u/Z)ԷN \(FnZSNS=h7CgKD;~:*"IMn=,?K'q(cdcbFkuXOW\0)])N͸cI7H`q;7G d' -7L},7WTP9T㜕+W~PY?,wH .G(0B$IFx{F-](y\GUꮇ _0y,t"1rP3ORm\PkyHRNu@KZ!<ܶS%?.ʱw*ah%Ư6B[qNR^GOxV*"J{3_@y_3TuǺ# 8_ֺ Ğ =.q+^R,'11Κ{k 7li$ѨLvTE#} d}CAX SGGٙn:yH+ uy\iP#& @ȐLocaxih\ #4&7E (C+> ##uz&@tKr8<:#kCfV@.-[p:.q! VB3qV5I=u?(.Eu -:;:ѩFJL ~9k=I[,WKcdDq :4\{ ip (V EAKtarՓaX( WBbsW 1uȨ$9Wy(W81%og-')6֎ϙ I/ YwI\XS}GC2E SuIJ4B_8DWہ$}(n*l67֕g {hꃮ$O5u};!>ˍmIu4d-p!U=TrWe/*AESγ!.5-wZ.rKUtijq=!ARrݷE0=89TfR\vS[0*X/g!̠# %"  PAG $:86@^ XKfJX|A;B:v Ss Jp؞xڊ@G&](Xe <EGCgISKvWyJcHФ ۑf FGu7ahKZC2Mz1 pZm[)z1{n+ݖϥ$i{*L[)fĖ%d~QRxe1>B ԜqIJWni%i&w)\A6 HYnM8Ѭ;DIـ Qp1JGb*Dr?i$xy7X/) MCDAc/Op'pGxޤ-d@4ω(z|RC5Yc6.M|:DoDo2a5cM |chc'>J-YDޛmGn4Yܠ ؆+K! H7K6d|!ƝXy<<0[<4-߂pt,j@XF3`p˼%KO覐rsL7]1 Zz@$}GFiIih3)84 zt#a閔#J:kBeC@G *\Rɬ]n"tt($a!( L_ij9w!L=%@p:[+rl!oz|;v?4NɖU(@PR6H@`"k:y=Hinìė'0VznA8=֍DY*T-~nl" >[QzePʨkk5@EҠUWpRFcQ=|Cc)ԣuŠL6*%z.r\ɒ9zTU\HqI)oˉ$C(>ұ6̊TzhZ{366(DŽ !iP!1$ z"S$<}zܬ;דf"v1UX`y,cWdF@h+ФS 8tˉe8<`nhZ쵴Z9kX賢0yI԰W E) w!7~$utGm2G#Eܡve޻ҍ GFڹiM?vwҠ:"PRkX!l7|&HZ*R`_ɩB !Z`f]Pa:wU D>u1z{v $ɃP\L@z4i<` `؊n/%?琴L p$XSCQWP\<@C0ކ0@qy!D϶4N:jgmh3/j N ߒv4BEb,Ji7W @Qxo rdvDLդ)U4 _5DУ'Ti_.\iT ҎAX/^Jkjz"[ya3!ќ KyI߰8P,M*0R&d` Ё0qP>aWp|.pMĖP4@ @$v6 ǭtz@F \OL'AVZGKQ I7BD4 ᆨ7xa5 b L20 GlS`0˜rHp714i55\kP jէ|X(B%WS]Q{1̉LaSB<-`+% QP~ ~"IOC eB}8}4`B8zA()`Qv[<)Wi *`929Ow\0;;MC@@QP2 7E kGD ڊBkSZ֎0(Ee/M|2l6 (f9ՄJ-Oy2FiPsi;:zo W,;YS|ٴjNH05^ywW. A;pNeuA-xK!a]p7Y`|}I!KPB$3Xfn,5IyJ;iW՝vՄ݆Ydz Ħaó &A=) EҙFNwNtv9y:{@N"mz{Rl)bC{}ʨqoL$p1` ,Q;Oj64YA:5|#?n :T$tj [m]džGh<3,._Bxi҃" 4YG*hC($~^aws$0r﶐:z#>o^P n\ @QnJh9O3ten!r<5X~y ;;Za4ܲ bVK<͊v*XQ6Nt N(@c,~H; .-IY 8_(4I  HU2~ŷo'E2uM=zߍHY9WrNA&<]''ϧ{-(,t{9&HGnAa^E`zCF_bIYkIqkQQxqc]1/M;#Ѯp1=Q5  w|;1r5Vr@CgmVMٰ;)TcQ!ۏmU)yAyՏ',_.$2'sV@Npi07Mx&HSĴ&txx3:%<}3"d2^6k'f-f~nQE#q Դ"!]"t8b2AbT(j |è\=_ 菉?+qjL?UbAwb=SREu:kс8Gj@ # q -Id6b%E0YGxģ,.es9ѮEh`ɦf$2Ҍ#ѹI{0𤉟ŮC nұrur2O)Y,S>P2X9W$u0WfZϗ΀ mbQcߪčm8mŕ.weIR uOTw1g y>xMAF.Ь;4O3pw$ѢG@xhp =)P(gM9`_kͭ} CJX=={@rUpiTXtXML:com.adobe.xmp  pHYs  tIME 8:#bKGDG5IDATx{wF?Y) R%R7ږ-Y-#mӌ|9FvlOtGrggkm^\{5鳓Kg֮W|?X oQhN_ 9C A A ArVf~L䃺m(f!W8!$o9X"sOK&Y|7MN%gѿ x,+~ZdHg/nDU(w9؝dse]=5Ȏl0Gk3RӖ XGu?]&䗟 wgx/(*8*FuJg{^"/~lBAKA+_׉ÿPo!,uB8Eca@ߝy? ?l(׶/m R\d#)kds9 ~ȀZ B  V>![XUܦ=    M~ C A A A A ٙoe@I:/Tl6y%A2ߑᣱ^JÃFs- Z4~,Qw.[M@/QeoľU~Vπ\lHKtk}Z$S^Ϗ[; x;~m߸m^xlbL{ fpٓ7vW6a[/]ܪ @qu ~<}K\ØXUR'"[WՃxC4C*OߊUQ"46;ʎ_ ?e$dEL{$9$[qX/ֆohj4ò pW2Gy@C&jr\FUMӭjϺ~_ǟ~g 2G_xn-_j`mn^"_\ޔӞyyʫnZnՇIN~;~gF>d; >(@,TGu+ 7r:eݲ'g(̧C dف_قQ짅ZWR躐heUK0~gdW ~N(iߥ?&OK)ɯ4|>\kĜhU^5+o{ O߬scjM==ivT>PGw1Z~³o뗼U;tR_8hI)繌~Zvvv($(ϲ\4]vOa̒ 9cSzѩWOǧ=~%q8Onkj\2FV:Ӊ7ÿhhϭQ^_<K{Toώ{& xGwF B}ٞACӞ0{{{#ϭg;;[֫?i}3I/~Ux&g EݫqyGw_ ੌy~q=x4d-K$'ϭAbi(? Gw _޹Y 8CZl'h6 YLg~Itޟy49`WW =?NLu  ]<;[tn"vh)]l=q1o,7ٻ\wVj ?N|]fu1//VfPA>;FC!ֽO}kyth^Es4]}Se5iʶ={7 N*e|wB^Wg _\#H8)KE#<wFW.@kaVVaeb+>Ac* W;V:}?~zfWg &],ВM̠٩ L@StCzurM ?RUV)8`m'zSߴ" L g[ )7;WWGI'-NWYM:{ѹA$#r}s٭38_0{vhWg~  \vFOR!LY6n /@T?2[462~-#'e%{oJթr9xjST1v]gò~n5)#/s˜/zu*zFCzܶT dvWOY6y}nU,T6=iN~<>/R٫3.QD-U/bv_ }a:m7cч0I^f{,o6+~? A EQ:7u?sݴ^}i_&J^&~WM+«3)B'VU_6;u ?RYϨWy,>WͲՙ~ߌxp]:gLHt?~ >B]!h^~H{@ :A0 d*,:9^_žDwB,FSryg^r,ͽőTGob/ۗzehBFU~N-n-[#e5FE*v1 RLi _X׽xVW$=üSY'NpQz3 k"RqNb3DdmdX߀LEK˫g%{٠wvӳ*{5xQom9)1V$R`TiexrRv3,4eҌEfŨߘ4PUh!7Th ˺哣]7rVǿnm;~ͼF9Fpob?kb/V$R]miC++Z;N|Qy~cREy BXc?W^G^{iy]])X۶]QYwBkؿmZ7&w|ut2`)xrWM D[2C7[[; fŨBQf!Czu~svvv=ڕ#2dK~P˜9dʝ"~mT `)xrWPFTQ?q[~ܠų d41tVy$^MƋ'Bqs+[YDeP2e"RbG)#eE|]5%)wL0*"p}}hҪ,, K{,2~s}9c7c';ԋ4I.PߒHiO *® ;k5a̩:81?Wg diy%8Wՙ~%_WupUS[ P^*l` fjw_'Bus{>̵ޔ** ؼ߉ΥM/ T tSo\̙M0^y~%rwrx쏹hL}^oy~ ?QV a ~eUK*ß9Ȑ"/!e;7f]'39|TVWRO~3Oʖ2?v-b"{}WonQy\d&jY}4>~ՙK{=x76d2~uS l9[SoZz:[mW_B~ZZĀ<|m f<䌩 |F>)< o:G!W64Cix!ר=gU>*z F`?{o%?+o9_&QŸy,kڜ;o9W;Xjya|e>s4 AuU`ZMJ?;t =0 /4g ؙ`@3!ׄCmy; .اc iIǥӵ`Mrni`S/6HhM뛡+Ov7^ƥn=XpkxҤtϯ {VX4VJ4VZ{ F=)ɞ 6RO>aqw!o+DfkIq1uB\[_">w9͹>51Rt 4_8c ?ĊS?o8獸`'u #HIRCE/"x0nԊOlY+G_6[e} 6)F愜s#LM ʪ>^c  WMSOmB?yޣ?>2M[t\G=Mno]Vi,3 ş#c-,%N;w"gXP8Ǒj ~n)8I2OH>XqrCg|3 c2&شIXH^\B%9f{׉(ͣTCG~ͼyܿmL ~g O ^M'8I|au=+߯IiAަ NOO{d"0>2"8ZM=$~{uMts?%&{'r6in""zPABE)'1FPȈ`1~G"'[&*~_6yzu: d%Jvmy6@B=`7pB:$4ZguZrWXq~tX?vgdMxߋEBG?: ?.Ï}G2):1{0;җ%?Wg}t\wK-= =1UsĚHT `۝Uܪmo ?<{p*sL6mOk(nǦ |UAf œJ?] b ,g&LaZE~Vë T}FwᄽкWu*{{ EW_P@[NNZѮџ-w7/_vE*#O36[F蚛!=ɒ!W%A"/RTEV(& 9Azx!&Qթ* CnH'`U۞;t5/$Ÿcj۵v76Yu~^m.2.~D$u Wzt_Eƾdk_Bapl}ԌW]_/UB,-Y8; I>s9%!$lglpU)Ҽ;'X/ k%nU)ϟw!{|`}%U+ze=6+m'{U*`=#Q`:i>Jr䇟f%(8S?sr1QЪ_k4ma}ph4C?nO#/ ꝋd?N 2<~so#jO?30UhTV_q?:_~wg?v) F?=!l.TE6~Vu{ӐU%w~xK6 Rm\nKUKޕ6&t.]Zۗ_ɼL*.=_m_!//)qwo|>40//s{Nt` PUWxk> K+QZd~;3 ?~Ѭ*/Y6,sYobA'A,`/`M~.| L@ )Lp(K//ƀ%ׯ%//K7]>_E:`??=t_B]2X6&>BW )\7O@i|Lﻙ =A_ےqOF ~3=b!oө.m@bRlY܍hsZ;a@R>bQ?{vuvs 'Ţ_*;ai:nkΉ5'U$6K*+2}˚09XY/9S qZvp57,Eakr⓵֗+/%Bo"?+eUC^d7*ZE] ƅ@ԏl7IUek, R3f?Rk $4vєfvu2 _L4͵VZx2]sW.ʫ*_!Uܢ)Vк-RzP'Ax|`:GQ/ÃU?-bL)0&U%оa!ؙq];%5c :o:[vd^5pbAӴ%Xįk?5-RN кZ1%Z4`O !U5Ӆ`g ?jr;%Zb~R{`}3G SDM9~f@)Bw"PoEJt3ci@ﶆ=ʵoԈPL"֯_u`{-Rx Jvm9<ع_0N;~]ӥbihݻ ֧_?%}R^\gV=ԃm`?`H3?`g.fKj<`sd)س q\5iP5^9i, g:kW؛3m6Юg,oz͠)O_D6IW_`{ )9??\V5[8ˮV ' A2`+oMiO"1VU"Θ`Uڍd^o7h&EYy1SfP6zkw$)I?GUbu\;nUg_0KG˩ߙ~tm%K?_Wa{J \>\r0&[WzO's![ms5y ?AAWM ?M{*3{4E}!vϿlJyI\S~LVCᏭFys=+j"g`*6^N'o܌_r6vԤetk*C= k(g%3QU<~ռcH3?P<ɼ3hW;kK{X{nn_&67IY*WRl xa?ϟlۨ&4|B10ϟQl3YOSy ~ikvǪ8ЋY@#|-c\Uʩ.cMP}"J4QK7*0&++T6J(C8ͅb4LԂ?4Ƙ=`1`O~5k]h _?%>hTSm<=]\8Ձ0Cd&+i%!ԛW'⚩'huMb姉ODfYmGٌ~fhhȪ@g^CDь6:}z[E3!X_D OOE_(E2QoblawPdi9u"mDZ^V^T+>0/_:&GP[Eu-?ؚAH/K;?{^%u_T`EV{\T>U.+Ҟoſɂ&7Nы61]$g'C ;c}:]Esݱr?#54:$y׺-өDUjkn*GHl#Y_q#\x.9"(M.كeSvKRkV{\'55XKjĜ?So?i*)MH5Z,,Mn/?q9& ϻR؄` {y TΣ|[`F<̀ӽYkR`LJaSo+kD RgoK V<-RJi R;NE{ ܝ'dz~A+9,{oL{xG4Mlc$ v?JaSj30w)A'l212!P4F{Y@`O}XƘ`O}X`O}.Jcu U`ZU`WB;1= c0:u>7YSK$ 20!O`F 2ԯ+֜Y46a}Mq`JMG֚8^*=i첓=r^ ~ :¨T)]7xg$I=WFN0'M?gȀ3tqD~(ñ4Ea[Qc`{xu,l;%`y_ɽ:eѠSY,~:4:{# 4'h<|Y;ʎ(?$~lu x{Ѫ\sˆV2ݪd|vcGw+eυݽ@窎 TY3GEkn2;*}ֶYOzk$#߽C ~Iv]n x˩{?6Mb~ EúWۮ.Vϴ{59µK^ `{] ?Zu e\XW@Y3ÃȠl?7*@s?@?@?@?@e/ƫeϤZ[kMxz4~P:g 1oս?^Z^>޽;w*Y}(S-'RoFAf~xZ9^+͉5et;;=U?LZ~1'~g 6mOk(nǦ |UAf! :kW؋܀@WYV6&?~hJ h)+d+hO߈U-U*#g)bV@[^` FX55^u:UB^_AtH *BiףF?g\f~U4Q} ]vUQ3~\u~T zvOt~goȉ9!.2(=/*XG||lFj^YyGM_s _A ^J-9l4`##/f& ̝눹 ~䞀VaU>Ґ޹H6cpLOK/l\?aBc9ʠ_~s:S0MnU_LUy|oCړ ) F?=!l.T ש0,/* ޕ6&t.]Zۗ_ɼL*.=_m_!//)qwo>2y,=+е M^ G- ?(Ϸ~~VrM[YX2J`ㄔt#hQ젉%`]2Vi4Y _Q^F=`o. `Bx1$);~h 6{JO7-S BAiѯ?94ͫ-M$_2%u >wx'/%EռJ37CڣIW u31P4`% 6:7Gקr*K2\W.+DXEE«PdWa V"Ȑ`8_DS >- x4/_SERyHJ{B;&ƌq«S+>Y#o\Vt~jns~>ΡvA/r g)8|Ϗ!Uŷpl{V] ]P-[7{o9]ɍ..}WheݳW78nܚ)dr_׽AUUu'o!{uj-4+4M\#1x>24 U}Agv ?W [_V SR}[t߼O}:5yr@ uu|GFxuɷ[>>܅W [*Ohe[DZpWlu_"N? [w2SKV.>--zukwǞ}dg$LwPCo DnU r/:lyM~ 5~[z>0M)'3d;RN׀QcDN{9M&ȫp#r~3r>>`PeBЀS؋9|ZlD/@Հ*kAOk]5`AR[Z(D{΂x xc{dNa_?d_5킄_c`w|4VU~G+O֫:[Xۊʍ4:A1P^{8o>V' n= v ȩjd9_Ujs lm4$=  `]2)L Lv xlTkk ֶZ_ƯVBJǔlі=ͳ^~p+_<;[u˵3Cfsqݻxw2rR-ɚg^y1j?z!a_iN+Dܹ QŭZM f 7ȪO2Nm۱_Uг2Gvy~l>oN^vw^'x/3NT(R `o?o_$yAum9zlirw*eWdk>O?#E?nV ~$GpN@;ljY>-Uskϟ׈R(?NվW9WtC:P>zJM Uv7}V˃rÿh;{|N?p*lgͪQ} ]vUQ3~\u~T zvOt~~) ??]#Ug"wrԷIwUjϸ83 瑽S_Q:Kd7"W U;zlW vOTjSox隔,m^Jr䇟f%(8S?sr1QЪ?ic/67켍F =rO#/ ꝋd?N 2<nÜ^3Y 'VIt*-` ?ݪ8_c K+QZd~Pno??䚶e.|1 ?)2 )2F ' AKd~30*hA?( r(OWTd)@%ʛdnB-߆gg?b˧bnsF*O!䭽r4! <8 eW$ ed%#u+ _y5qK3|W/I;[_N_֊vO{_wQ5˪h&?ēO{œJ9A(K!mV9.]4RR՞`)Xf)j)g UoTZ9d 4&k俀ymuŢ{V6aUu{9o9OIŤ=9ť[9rhVH6SܾS ÿC+NNJ,iNX% dHZ Y{a%Ъ/?%~CqiϷ~3,3Foɶo{9UI5]9߰&x>B0n^NFWn߸Eۢbi14ZXïHzF`~kLqڳ~\B酹t )oѽh\ۊEV]+cǴV(.IW{r6O@oن?8D &tIgcUJSoӞx{ݍǝ~”b/Ҍ=FbUѢe~o/h9YS/Oc*7MCh?m]&Z뤰$xۭ YK?NU/noVxˉWyr˪y~);*Y3~3Y@U}?+g~5?>,`ϐ`3$. `]iV@A A A qVl#@/HҹKcR\.[e:La UJOf5`xО!w:lP409@F:~ %γZ\GKu iI`,2sf?["6),C nh|]ht- P܀\ gn @kp3$%(Q@ҵ@qw?i'5c}ML_ɬe` m ch] ~ u"ߜ 5~n&2s<܌Kg^؋Ach.fEŜ 5;G5#\1M_?r~n -u!nnuq!~#翉kS?7AヿFX/h]m3~v?7ܞ~Hg]Ϸ9 ~hۼ1Ah4Z;_>NYz5c|o\˛; C` mU6I*Cn6+/TAi']?6_>Ϗ>̘y~)ܞy~Od_e~A !!SA A A A d/ƫeϤZ[kMxz4~?oT%ỷh33ˉڬvxY[N bLaOI6AVk"Q5ajW2$OVmv$ܿB{*u~5\_w*?{S? w=+e~3?~'^+`p[+l@t/B$[q/9zbVP[SFکdH-kjhflUe3&XUfv#نx5oQzVʂ,/*_Eƾ$IaE gMJ98:q:{_:ZN5ī_U -n/z5W*w_R6=3~: |ל_r*TQyGA?ٞK` ?jbiSEf5ܫ4՞ >Bj~J1n[5fzyЪ*;~g?7n]V9U;j2j `5Rwfr?1`1$Eg? 6Z}%ƞ[ۗIM{mR'c⫊{w顮s5Ç!^:}]}+f%Jy,/>V0io?w 'sRf``hķm31`H3h푋` #\]sd6ؗ,1͑`W;X3>,`Bs@>XL1d*0*+]!Ý1 xt S: r>A6z9:r$Yߝ_H;56_8l+q^UsqfBFyXLjY8BJ!+fEgWlJK̜ԪRcOHsȲeN&32}d#͵FQ"+f79̰v`Yݝ}_32}d'bX$GO cǚ0GR«ӍpHXTG{jHv}zpGq/7U}2Y՞I#G]U+:1$EW]uE~Q ^Ŕtǀ{A2ztoHQU>ŽBr ~ґ%?WcPcչlA,zuj.dZh4ofk}$}$>k}i2>R8'(9X0d&+=#G[ l/q^vQ_t[7 *{u>?=]4BNiђ/wVqV2)Bf~&2c y #C-ﮦu29E8Nu(N+$״|KWg5gN~nJ$.W''lE9)^)wF|nNg34jN|v& ?7gdB`G/^n6+ȫSwn& ?7їet\מ b, HCȱL?V9_K~#N< ܝyu ߛS ֫=noE=D>Phl}GU/5W_9W' s"ΫS.҄U +lo7^l#R#=W3I, xCɇO5ϯJQxuo8/_@W]d.5*L9Uf:#;^s.eɫv^`gԫsR=*Zy?}:7gͫ3hʩk~&1^t:˾mya1N}X`O}.Jcu U`ZU`WB~~~jgh#IlU~4wVo 4iϹXW"iB@3 GS5_%V R^4 ߫) G-C:̀<$h^!^HQl9NnUl۠Gv9']vM_I{ӷnmb!ж7zK}k*:pnE\jn uRhhMzV]%/tO"۹aO T&G ?1A?40{?(wTS3~%?(k'k?(w?gs^057/Vl?PU7u\4Bw+wy~&UMN\Ʋܴ'V&u~nAs  >f~+qX/b@?@?@?@?ϞxlTkk ֶZ_ƯVBJǔlі=fNj3p]ڬvxY[N b8Va lZGoĚHTMXUeqFo  /{e6mOUI|\jrǚ.vy ~h +x5{ജ ?i l\!%Ƕت^f { ي~JYe,<@2_#`t2[ R55F4c3WU2*CzlmQڀY`ZU d~tAfL-?"^K|U ~,&4P/m2آ7"9[!qOf5z6f R gӷMص -Zy4C1Pe Sѝތ?FO<9WNLR@|%܈S4Hkff!LK \ގFW\B?Q4ݍ&kWUZUjFe]˘ ?79hލk6iYo_i^:A}I--Mz(7qI!LXG- KՌtote]ĺsk(䞜=$E Fi_DsNV?8j?!@%;{ 7?N8~(J'pC'jZmޫJ'עn|L aKgMzqy# eMj){U_8QfW"RC5:dܞˤTpo.^UZkWf~G īS!ʫS`Dm{d۷vpnE/RtT9Jx{q.e}!e;g]ti{ bzי?}w?ķGvNOb^!B_hf&tC(挈ѭ߸tjkggw \n6x/_ꌇ_)C/o830K,6(6~T{bWW;Kh;Q\tCQ~̫3~Ta c_6˙߻.9&O^M*8ڌٞ`ϯxz0 )){0@@|@^w"K^+9|\x'uDs[,OB?ϻ5;CY x[K_L.?(wTX]:J6n$ ` /3& .A?4! A A13'7w2j2f5`w2,sMɽ2h6qPE-Fhx^5~[94u7??9 O~s8YӒsuxuҊg!j|/ى˲l 8qlzκD:99qL Pe9JLWJP|0Խb:`)>VwMN}of=SbsN@op3Pǫmmۮu vzJS/v:y??e\9%-w+Yk*w+˺ ?=1*?v'揩.ε#!9Vك:%G3F$7I!Ͻ:eXߢ[Y8\gsNXmg(?^y_8ʞOBŴr .2oY,^sb BwUehK3h19v]&N~)/zSV66uaߺi7sNd?&_\^ɛT~o)ⷶ g{5j4W %E.Zΐ@((ciohI  Q^F=`o. `Bx1$);~h 6{J9ӥOn_ïn4uK3һVj- x ,A9fTz.tDzH#dCOI }OV<ߌ6 _w׋٨+%b??/V4tR13$kWv YkHs/#zl{ԌH)<´l!TpOQTB:𻭡cu2 VGZ-~EJ# vB۹V[ju WWXKׯ:ZdgDlڃcM'Z!W+ii׵'=':Ow5͇i ʪRChwF d :^ ;X0#1mfXO{S/iƅaZpA NUZd'MmSos^B``abQhlڤ+m)/;L r: 3I ~wj9y~yY<#yil),‡*HG&N޳bȀWI%.Rv]@CKܥp 4ZE?C4`Oϟ{dƨ?<.%߽Zr1$lzu!%'pXαGO)k7^?T?{*獸`9}t/$N`< \_PxuG݉+sx&b)d`ғhR(ګSh:vT)I .k!Bc8ϊE?ؙҫm{{]B$2Cxul9<kF$K[qVV 1FͼT^<xu*Ҽ :yuR^v:~X;1`gm1˼^f?K}Λ=  `]2)L Lv  G A$6ҭyvڙ1`Kpk,גU Lr9]ܟ ÷ {ZAHl ,ue+8ą#{+a%)]U%UNl]xGL^QCzW&d_nlz՞%l?[zI͍}tz^_.yS+,> '"} c,7a?ٻXS/PdmUm3\D(-(.@c[Jk[ݚvHαUE²+ ԅ޳wbz w?!o&?P'XgU(b\cx=Vkʲ7/??]&x+K4lFa`Vgu_66!1ȿl_ 1ȟ%ܐXnaO:7n3Jk ^FwtۺeտbmT@~m3/˚pN[sW*b_Oou+R*U' xW5s^= syPcw*>y_MA.2ya\&/dw!C~ȟl: ̒3p'@' Mk-F6-25]AjNf w1 ?@~ ?R<2Xuםκ9ˣA|g'^ϋOeo >"`-1oO~nە+K2/ |IeAcZz˸a߯ /_.|b>*]^JUqb`Wi[ݥjZV3(;0 wr>. Ũ}U lam;{sʙûΞχ*}k߇Ϯ,,lfdU&?*b L:2τ[)si/wmJ\Xz6} {t?"d@J]\\r/9OR>Z~o*|ɟת jVUu}ga $٭jH~2j7Ipأ;V ?ܼQh5pv wċիQ+U ^ՅW_~f*kA~?{)s뻢n=变Wޅyӱ@~Ou#̸+hWN@~ ?@~ ?@~M~_þ 3K`<)um V֪Qj19Vg^~?&bX!?W%uߕ_E>T>'CmfZ_Fh/q }Jkg^w6Y,T/}f[R־b{XZ9Dˇ6XbTst~: _7,ʗm3Ϭvc.qݪ9ʧSb +mvw9O˟mL?ҷ7_Rz#JeAASdIC?ZYW/~y5"ީNf d\.س,P fIl=1+t:`MO߾&6ҙ1R㟽E{3/HG|7[SE.~+ťm:7&/6\gm=%zU78߽a^kҭ:zéJXተ;b=7B1~#|k+_+((˪X`%+$?='lN1_,zk_934/ym ly~ϰ'C~0muMưLtstQtahn,_ B 7(Gd`}ɓ?n>J0I?nAjo6!?H B m{AM~ ?3Abjvv+?iKBIZF[`"JeAAhS)Ϫ_TN~1rؓsb/xxT`7w],<7 vIUKG4Uf{?{=Ƌ$rƾ[wfF~|}#΋u}Sm?߸R\ZfsknnlbCkOYG^՘5IX|J׾b`X OQU)WE2,R0OA~!?k,&;=]3 麍qcq"6F0FpXv0V]|Z* Lf !?Oh54?-<~R'}me>yۨ}=m(f[fO'^nv-i%| &>NR~ɹtL W_ q1"oDDΦ2V8-;izb%s$ޞ;nyWw'YIi rJ'?W8EyM_rxꝈz7貵#shUeQ ~[E@M~>}C6XZ r_]-S ѣ`&磿bmy\Vn)yꗑ+MuSm;Ԣ#lpKmOC!h`χJgdK+GbbZZfg`Hg_~z~. ӛ<㎡Pq/ϺUӔz:H~oDZEȯ? W*~cT6C'q_h:2ry"+/"xxv3KnHLW s#wV,<2 YOΘ?&V{ER[6}_Ƀ ȝ:2(yVސyֽ%yA4_xCHM\wTVg$?II`7 uWvE09eD-H_54^Se:?AgnsR&9/:_gIh1Exox=Nͱ g~C'.x_ {h<:1}TVҔ!oI~"wSHz),(JS_ݬ[V}aO~$կBULPf`? {95OC~44hh!?LOK ?  ?,Y*+ 0F hry[u U5h+~Oe{_e]Q2rk9\Dzn3way;=O;vɿj{?b`}7SfxVUіSgZo;1;8鴗ݭzkn&R~KSR?|!/Ӈ>ك[vW̅/W̷-3|q[nUE*_(ejUﰜ̯xr0RU+>O^;&?]1Mw8kEȯxY1er[nɔ)K3yu1rޫkь|o-Ν-ͱ-AmjxMg.gla)/+"5ȯ5Fp~WJe5NVnk˓˗OP 1֙s6]&˪.&?Kq{YL6_uǘjSq{x?}l׫4iD'U?=l~@)p!NӤ~%䟌OZ =mZľ'Zww7tu'%3%U`l@~@~ ?57qьi hY$ 4Jr;!?Z#ó^C~0ׄ</*^,P C~0:wL|/BOsN;Z>'\T<{x1R81S(uԺAj݇ C~bRo{ IENDB`libplacebo-v4.192.1/demos/plplay.c000066400000000000000000001544371417677245700167700ustar00rootroot00000000000000/* Very basic video player based on ffmpeg. All it does is render a single * video stream to completion, and then exits. It exits on most errors, rather * than gracefully trying to recreate the context. * * The timing code is also rather naive, due to the current lack of * presentation feedback. That being said, an effort is made to time the video * stream to the system clock, using frame mixing for mismatches. * * License: CC0 / Public Domain */ #include #include #include #include #include #include #include #include "common.h" #include "utils.h" #include "window.h" #ifdef HAVE_NUKLEAR #include "ui.h" #else struct ui; static void ui_destroy(struct ui **ui) {} static bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame) { return true; }; #endif #include #include #include #include #define MAX_FRAME_PASSES 256 #define MAX_BLEND_FRAMES 8 struct pass_info { struct pl_dispatch_info pass; char *name; }; struct plplay { struct window *win; struct ui *ui; // libplacebo pl_log log; pl_renderer renderer; pl_queue queue; // libav* AVFormatContext *format; AVCodecContext *codec; const AVStream *stream; // points to first video stream of `format` pthread_t decoder_thread; bool decoder_thread_created; bool exit_thread; // settings / ui state const struct pl_filter_preset *upscaler, *downscaler, *frame_mixer; struct pl_render_params params; struct pl_deband_params deband_params; struct pl_sigmoid_params sigmoid_params; struct pl_color_adjustment color_adjustment; struct pl_peak_detect_params peak_detect_params; struct pl_color_map_params color_map_params; struct pl_dither_params dither_params; struct pl_cone_params cone_params; struct pl_color_space target_color; struct pl_color_repr target_repr; struct pl_icc_profile target_icc; char *target_icc_name; pl_rotation target_rot; bool target_override; bool levels_override; bool ignore_dovi; // custom shaders const struct pl_hook **shader_hooks; char **shader_paths; size_t shader_num; size_t shader_size; // pass metadata struct pass_info blend_info[MAX_BLEND_FRAMES]; struct pass_info frame_info[MAX_FRAME_PASSES]; int num_frame_passes; }; static void uninit(struct plplay *p) { if (p->decoder_thread_created) { p->exit_thread = true; pl_queue_push(p->queue, NULL); // Signal EOF to wake up thread pthread_join(p->decoder_thread, NULL); } pl_queue_destroy(&p->queue); pl_renderer_destroy(&p->renderer); for (int i = 0; i < p->shader_num; i++) { pl_mpv_user_shader_destroy(&p->shader_hooks[i]); free(p->shader_paths[i]); } free(p->shader_hooks); free(p->shader_paths); free(p->target_icc_name); av_file_unmap((void *) p->target_icc.data, p->target_icc.len); // Free this before destroying the window to release associated GPU buffers avcodec_free_context(&p->codec); avformat_free_context(p->format); ui_destroy(&p->ui); window_destroy(&p->win); pl_log_destroy(&p->log); memset(p, 0, sizeof(*p)); } static bool open_file(struct plplay *p, const char *filename) { printf("Opening file: '%s'\n", filename); if (avformat_open_input(&p->format, filename, NULL, NULL) != 0) { fprintf(stderr, "libavformat: Failed opening file!\n"); return false; } printf("Format: %s\n", p->format->iformat->name); printf("Duration: %.3f s\n", p->format->duration / 1e6); if (avformat_find_stream_info(p->format, NULL) < 0) { fprintf(stderr, "libavformat: Failed finding stream info!\n"); return false; } // Find "best" video stream int stream_idx = av_find_best_stream(p->format, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); if (stream_idx < 0) { fprintf(stderr, "plplay: File contains no video streams?\n"); return false; } const AVStream *stream = p->format->streams[stream_idx]; const AVCodecParameters *par = stream->codecpar; printf("Found video track (stream %d)\n", stream_idx); printf("Resolution: %d x %d\n", par->width, par->height); printf("FPS: %f\n", av_q2d(stream->avg_frame_rate)); printf("Bitrate: %"PRIi64" kbps\n", par->bit_rate / 1000); printf("Format: %s\n", av_get_pix_fmt_name(par->format)); p->stream = stream; return true; } static inline bool is_file_hdr(struct plplay *p) { assert(p->stream); enum AVColorTransferCharacteristic trc = p->stream->codecpar->color_trc; if (pl_color_transfer_is_hdr(pl_transfer_from_av(trc))) return true; #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) if (av_stream_get_side_data(p->stream, AV_PKT_DATA_DOVI_CONF, NULL)) return true; #endif return false; } static bool init_codec(struct plplay *p) { assert(p->stream); assert(p->win->gpu); const AVCodec *codec = avcodec_find_decoder(p->stream->codecpar->codec_id); if (!codec) { fprintf(stderr, "libavcodec: Failed finding matching codec\n"); return false; } p->codec = avcodec_alloc_context3(codec); if (!p->codec) { fprintf(stderr, "libavcodec: Failed allocating codec\n"); return false; } if (avcodec_parameters_to_context(p->codec, p->stream->codecpar) < 0) { fprintf(stderr, "libavcodec: Failed copying codec parameters to codec\n"); return false; } printf("Codec: %s (%s)\n", codec->name, codec->long_name); const AVCodecHWConfig *hwcfg; for (int i = 0; (hwcfg = avcodec_get_hw_config(codec, i)); i++) { if (!pl_test_pixfmt(p->win->gpu, hwcfg->pix_fmt)) continue; if (!(hwcfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) continue; int ret = av_hwdevice_ctx_create(&p->codec->hw_device_ctx, hwcfg->device_type, NULL, NULL, 0); if (ret < 0) { fprintf(stderr, "libavcodec: Failed opening HW device context, skipping\n"); continue; } const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwcfg->pix_fmt); printf("Using hardware frame format: %s\n", desc->name); break; } if (!hwcfg) printf("Using software decoding\n"); p->codec->thread_count = av_cpu_count(); p->codec->get_buffer2 = pl_get_buffer2; p->codec->opaque = &p->win->gpu; #if LIBAVCODEC_VERSION_MAJOR < 60 p->codec->thread_safe_callbacks = 1; #endif #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(58, 113, 100) p->codec->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN; #endif if (avcodec_open2(p->codec, codec, NULL) < 0) { fprintf(stderr, "libavcodec: Failed opening codec\n"); return false; } return true; } static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame) { AVFrame *frame = src->frame_data; struct plplay *p = frame->opaque; bool ok = pl_map_avframe_ex(gpu, out_frame, pl_avframe_params( .frame = frame, .tex = tex, .map_dovi = !p->ignore_dovi, )); av_frame_free(&frame); // references are preserved by `out_frame` if (!ok) { fprintf(stderr, "Failed mapping AVFrame!\n"); return false; } pl_frame_copy_stream_props(out_frame, p->stream); return true; } static void unmap_frame(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src) { pl_unmap_avframe(gpu, frame); } static void discard_frame(const struct pl_source_frame *src) { AVFrame *frame = src->frame_data; av_frame_free(&frame); printf("Dropped frame with PTS %.3f\n", src->pts); } static void *decode_loop(void *arg) { int ret; struct plplay *p = arg; AVPacket *packet = av_packet_alloc(); AVFrame *frame = av_frame_alloc(); if (!frame || !packet) goto done; double first_pts = 0.0, base_pts = 0.0, last_pts = 0.0; uint64_t num_frames = 0; while (!p->exit_thread) { switch ((ret = av_read_frame(p->format, packet))) { case 0: if (packet->stream_index != p->stream->index) { // Ignore unrelated packets av_packet_unref(packet); continue; } ret = avcodec_send_packet(p->codec, packet); av_packet_unref(packet); break; case AVERROR_EOF: // Send empty input to flush decoder ret = avcodec_send_packet(p->codec, NULL); break; default: fprintf(stderr, "libavformat: Failed reading packet: %s\n", av_err2str(ret)); goto done; } if (ret < 0) { fprintf(stderr, "libavcodec: Failed sending packet to decoder: %s\n", av_err2str(ret)); goto done; } // Decode all frames from this packet while ((ret = avcodec_receive_frame(p->codec, frame)) == 0) { last_pts = frame->pts * av_q2d(p->stream->time_base); if (num_frames++ == 0) first_pts = last_pts; frame->opaque = p; pl_queue_push_block(p->queue, UINT64_MAX, &(struct pl_source_frame) { .pts = last_pts - first_pts + base_pts, .map = map_frame, .unmap = unmap_frame, .discard = discard_frame, .frame_data = frame, }); frame = av_frame_alloc(); } switch (ret) { case AVERROR(EAGAIN): continue; case AVERROR_EOF: if (num_frames <= 1) goto done; // still image or empty file // loop infinitely ret = av_seek_frame(p->format, p->stream->index, 0, AVSEEK_FLAG_BACKWARD); if (ret < 0) { fprintf(stderr, "libavformat: Failed seeking in stream: %s\n", av_err2str(ret)); goto done; } avcodec_flush_buffers(p->codec); base_pts += last_pts; num_frames = 0; continue; default: fprintf(stderr, "libavcodec: Failed decoding frame: %s\n", av_err2str(ret)); goto done; } } done: pl_queue_push(p->queue, NULL); // Signal EOF to flush queue av_packet_free(&packet); av_frame_free(&frame); return NULL; } static void update_settings(struct plplay *p); static void update_colorspace_hint(struct plplay *p, const struct pl_frame_mix *mix) { const struct pl_frame *frame = NULL; for (int i = 0; i < mix->num_frames; i++) { if (mix->timestamps[i] > 0.0) break; frame = mix->frames[i]; } if (!frame) return; struct pl_swapchain_colors hint; pl_swapchain_colors_from_avframe(&hint, frame->user_data); pl_swapchain_colorspace_hint(p->win->swapchain, &hint); } static bool render_frame(struct plplay *p, const struct pl_swapchain_frame *frame, const struct pl_frame_mix *mix) { struct pl_frame target; pl_frame_from_swapchain(&target, frame); update_settings(p); // Update the global settings based on this swapchain frame, then use those pl_color_space_merge(&p->target_color, &target.color); pl_color_repr_merge(&p->target_repr, &target.repr); if (p->target_override) { target.color = p->target_color; target.repr = p->target_repr; target.profile = p->target_icc; } assert(mix->num_frames); const AVFrame *avframe = mix->frames[0]->user_data; double dar = pl_rect2df_aspect(&mix->frames[0]->crop); if (avframe->sample_aspect_ratio.num) dar *= av_q2d(avframe->sample_aspect_ratio); target.rotation = p->target_rot; pl_rect2df_aspect_set_rot(&target.crop, dar, mix->frames[0]->rotation - target.rotation, 0.0); if (!pl_render_image_mix(p->renderer, mix, &target, &p->params)) return false; if (!ui_draw(p->ui, frame)) return false; return true; } static bool render_loop(struct plplay *p) { struct pl_queue_params qparams = { .radius = pl_frame_mix_radius(&p->params), .frame_duration = av_q2d(av_inv_q(p->stream->avg_frame_rate)), .interpolation_threshold = 0.01, .timeout = UINT64_MAX, }; // Initialize the frame queue, blocking indefinitely until done struct pl_frame_mix mix; switch (pl_queue_update(p->queue, &mix, &qparams)) { case PL_QUEUE_OK: break; case PL_QUEUE_EOF: return true; case PL_QUEUE_ERR: goto error; default: abort(); } struct pl_swapchain_frame frame; update_colorspace_hint(p, &mix); if (!pl_swapchain_start_frame(p->win->swapchain, &frame)) goto error; // Disable background transparency by default if the swapchain does not // appear to support alpha transaprency if (frame.color_repr.alpha == PL_ALPHA_UNKNOWN) p->params.background_transparency = 0.0; if (!render_frame(p, &frame, &mix)) goto error; if (!pl_swapchain_submit_frame(p->win->swapchain)) goto error; // Wait until rendering is complete. Do this before measuring the time // start, to ensure we don't count initialization overhead as part of the // first vsync. pl_gpu_finish(p->win->gpu); double ts, ts_prev; if (!utils_gettime(&ts_prev)) goto error; pl_swapchain_swap_buffers(p->win->swapchain); window_poll(p->win, false); double pts = 0.0; bool stuck = false; while (!p->win->window_lost) { if (window_get_key(p->win, KEY_ESC)) break; update_colorspace_hint(p, &mix); if (!pl_swapchain_start_frame(p->win->swapchain, &frame)) { // Window stuck/invisible? Block for events and try again. window_poll(p->win, true); continue; } retry: if (!utils_gettime(&ts)) goto error; if (!stuck) { pts += (ts - ts_prev); } ts_prev = ts; qparams.timeout = 50000000; // 50 ms qparams.pts = pts; switch (pl_queue_update(p->queue, &mix, &qparams)) { case PL_QUEUE_ERR: goto error; case PL_QUEUE_EOF: return true; case PL_QUEUE_OK: if (!render_frame(p, &frame, &mix)) goto error; stuck = false; break; case PL_QUEUE_MORE: stuck = true; goto retry; } if (!pl_swapchain_submit_frame(p->win->swapchain)) { fprintf(stderr, "libplacebo: failed presenting frame!\n"); goto error; } pl_swapchain_swap_buffers(p->win->swapchain); window_poll(p->win, false); } return true; error: fprintf(stderr, "Render loop failed, exiting early...\n"); return false; } static void info_callback(void *priv, const struct pl_render_info *info) { struct plplay *p = priv; struct pass_info *pass = NULL; switch (info->stage) { case PL_RENDER_STAGE_FRAME: if (info->index >= MAX_FRAME_PASSES) return; p->num_frame_passes = info->index + 1; pass = &p->frame_info[info->index]; break; case PL_RENDER_STAGE_BLEND: if (info->index >= MAX_BLEND_FRAMES) return; pass = &p->blend_info[info->index]; break; case PL_RENDER_STAGE_COUNT: abort(); } free(pass->name); pass->name = strdup(info->pass->shader->description); pass->pass = *info->pass; } static struct plplay state; int main(int argc, char **argv) { const char *filename; enum pl_log_level log_level = PL_LOG_INFO; if (argc == 3 && strcmp(argv[1], "-v") == 0) { filename = argv[2]; log_level = PL_LOG_DEBUG; av_log_set_level(AV_LOG_VERBOSE); } else if (argc == 2) { filename = argv[1]; av_log_set_level(AV_LOG_INFO); } else { fprintf(stderr, "Usage: ./%s [-v] \n", argv[0]); return -1; } state = (struct plplay) { .params = pl_render_default_params, .deband_params = pl_deband_default_params, .sigmoid_params = pl_sigmoid_default_params, .color_adjustment = pl_color_adjustment_neutral, .peak_detect_params = pl_peak_detect_default_params, .color_map_params = pl_color_map_default_params, .dither_params = pl_dither_default_params, .cone_params = pl_vision_normal, .target_override = true, }; // Redirect all of the pointers in `params.default` to instead point to the // structs inside `struct plplay`, so we can adjust them using the UI #define DEFAULT_PARAMS(field) \ state.params.field = state.params.field ? &state.field : NULL DEFAULT_PARAMS(deband_params); DEFAULT_PARAMS(sigmoid_params); DEFAULT_PARAMS(peak_detect_params); DEFAULT_PARAMS(dither_params); state.params.color_adjustment = &state.color_adjustment; state.params.color_map_params = &state.color_map_params; state.params.cone_params = &state.cone_params; // Enable dynamic parameters by default, due to plplay's heavy reliance on // GUI controls for dynamically adjusting render parameters. state.params.dynamic_constants = true; // Hook up our pass info callback state.params.info_callback = info_callback; state.params.info_priv = &state; struct plplay *p = &state; if (!open_file(p, filename)) goto error; const AVCodecParameters *par = p->stream->codecpar; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(par->format); if (!desc) goto error; struct window_params params = { .title = "plplay", .width = par->width, .height = par->height, .colors = { .primaries = pl_primaries_from_av(par->color_primaries), .transfer = pl_transfer_from_av(par->color_trc), // HDR metadata will come from AVFrame side data }, }; if (desc->flags & AV_PIX_FMT_FLAG_ALPHA) { params.alpha = true; state.params.background_transparency = 1.0; } p->log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = log_level, )); p->win = window_create(p->log, ¶ms); if (!p->win) goto error; // Test the AVPixelFormat against the GPU capabilities if (!pl_test_pixfmt(p->win->gpu, par->format)) { fprintf(stderr, "Unsupported AVPixelFormat: %s\n", desc->name); goto error; } #ifdef HAVE_NUKLEAR p->ui = ui_create(p->win->gpu); if (!p->ui) goto error; // Find the right named filter entries for the defaults const struct pl_filter_preset *f; for (f = pl_scale_filters; f->name; f++) { if (p->params.upscaler == f->filter) p->upscaler = f; if (p->params.downscaler == f->filter) p->downscaler = f; } for (f = pl_frame_mixers; f->name; f++) { if (p->params.frame_mixer == f->filter) p->frame_mixer = f; } assert(p->upscaler && p->downscaler && p->frame_mixer); #endif if (!init_codec(p)) goto error; p->queue = pl_queue_create(p->win->gpu); int ret = pthread_create(&p->decoder_thread, NULL, decode_loop, p); if (ret != 0) { fprintf(stderr, "Failed creating decode thread: %s\n", strerror(errno)); goto error; } p->decoder_thread_created = true; p->renderer = pl_renderer_create(p->log, p->win->gpu); if (!render_loop(p)) goto error; printf("Exiting...\n"); uninit(p); return 0; error: uninit(p); return 1; } #ifdef HAVE_NUKLEAR static void add_hook(struct plplay *p, const struct pl_hook *hook, const char *path) { if (!hook) return; if (p->shader_num == p->shader_size) { // Grow array if needed size_t new_size = p->shader_size ? p->shader_size * 2 : 16; void *new_hooks = realloc(p->shader_hooks, new_size * sizeof(void *)); if (!new_hooks) goto error; p->shader_hooks = new_hooks; char **new_paths = realloc(p->shader_paths, new_size * sizeof(char *)); if (!new_paths) goto error; p->shader_paths = new_paths; p->shader_size = new_size; } // strip leading path while (true) { const char *fname = strchr(path, '/'); if (!fname) break; path = fname + 1; } char *path_copy = strdup(path); if (!path_copy) goto error; p->shader_hooks[p->shader_num] = hook; p->shader_paths[p->shader_num] = path_copy; p->shader_num++; return; error: pl_mpv_user_shader_destroy(&hook); } static void update_settings(struct plplay *p) { struct nk_context *nk = ui_get_context(p->ui); enum nk_panel_flags win_flags = NK_WINDOW_BORDER | NK_WINDOW_MOVABLE | NK_WINDOW_SCALABLE | NK_WINDOW_MINIMIZABLE | NK_WINDOW_TITLE; ui_update_input(p->ui, p->win); const char *dropped_file = window_get_file(p->win); struct pl_render_params *par = &p->params; if (nk_begin(nk, "Settings", nk_rect(100, 100, 600, 600), win_flags)) { if (nk_tree_push(nk, NK_TREE_NODE, "Window settings", NK_MAXIMIZED)) { struct nk_colorf bg = { par->background_color[0], par->background_color[1], par->background_color[2], 1.0 - par->background_transparency, }; nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Background color:", NK_TEXT_LEFT); if (nk_combo_begin_color(nk, nk_rgb_cf(bg), nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 200, 1); nk_color_pick(nk, &bg, NK_RGBA); nk_combo_end(nk); par->background_color[0] = bg.r; par->background_color[1] = bg.g; par->background_color[2] = bg.b; par->background_transparency = 1.0 - bg.a; } nk_layout_row_dynamic(nk, 24, 2); par->blend_against_tiles = nk_check_label(nk, "Blend against tiles", par->blend_against_tiles); nk_property_int(nk, "Tile size", 2, &par->tile_size, 256, 1, 1); nk_layout_row(nk, NK_DYNAMIC, 24, 3, (float[]){ 0.4, 0.3, 0.3 }); nk_label(nk, "Tile colors:", NK_TEXT_LEFT); for (int i = 0; i < 2; i++) { bg = (struct nk_colorf) { par->tile_colors[i][0], par->tile_colors[i][1], par->tile_colors[i][2], }; if (nk_combo_begin_color(nk, nk_rgb_cf(bg), nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 200, 1); nk_color_pick(nk, &bg, NK_RGB); nk_combo_end(nk); par->tile_colors[i][0] = bg.r; par->tile_colors[i][1] = bg.g; par->tile_colors[i][2] = bg.b; } } static const char *rotations[4] = { [PL_ROTATION_0] = "0°", [PL_ROTATION_90] = "90°", [PL_ROTATION_180] = "180°", [PL_ROTATION_270] = "270°", }; nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Display orientation:", NK_TEXT_LEFT); p->target_rot = nk_combo(nk, rotations, 4, p->target_rot, 16, nk_vec2(nk_widget_width(nk), 100)); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Image scaling", NK_MAXIMIZED)) { const struct pl_filter_preset *f; nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); nk_label(nk, "Upscaler:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, p->upscaler->description, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); for (f = pl_scale_filters; f->name; f++) { if (!f->description) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) p->upscaler = f; } par->upscaler = p->upscaler->filter; nk_combo_end(nk); } nk_label(nk, "Downscaler:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, p->downscaler->description, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); for (f = pl_scale_filters; f->name; f++) { if (!f->description) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) p->downscaler = f; } par->downscaler = p->downscaler->filter; nk_combo_end(nk); } nk_label(nk, "Frame mixing:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, p->frame_mixer->description, nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 16, 1); for (f = pl_frame_mixers; f->name; f++) { if (!f->description) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) p->frame_mixer = f; } par->frame_mixer = p->frame_mixer->filter; nk_combo_end(nk); } nk_layout_row_dynamic(nk, 24, 2); par->skip_anti_aliasing = !nk_check_label(nk, "Anti-aliasing", !par->skip_anti_aliasing); nk_property_float(nk, "Antiringing", 0, &par->antiringing_strength, 1.0, 0.1, 0.01); nk_property_int(nk, "LUT precision", 0, &par->lut_entries, 256, 1, 1); float cutoff = par->polar_cutoff * 100.0; nk_property_float(nk, "Polar cutoff (%)", 0.0, &cutoff, 100.0, 0.1, 0.01); par->polar_cutoff = cutoff / 100.0; struct pl_sigmoid_params *spar = &p->sigmoid_params; nk_layout_row_dynamic(nk, 24, 2); par->sigmoid_params = nk_check_label(nk, "Sigmoidization", par->sigmoid_params) ? spar : NULL; if (nk_button_label(nk, "Default values")) *spar = pl_sigmoid_default_params; nk_property_float(nk, "Sigmoid center", 0, &spar->center, 1, 0.1, 0.01); nk_property_float(nk, "Sigmoid slope", 0, &spar->slope, 100, 1, 0.1); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Debanding", NK_MINIMIZED)) { struct pl_deband_params *dpar = &p->deband_params; nk_layout_row_dynamic(nk, 24, 2); par->deband_params = nk_check_label(nk, "Enable", par->deband_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_deband_default_params; nk_property_int(nk, "Iterations", 0, &dpar->iterations, 8, 1, 0); nk_property_float(nk, "Threshold", 0, &dpar->threshold, 256, 1, 0.5); nk_property_float(nk, "Radius", 0, &dpar->radius, 256, 1, 0.2); nk_property_float(nk, "Grain", 0, &dpar->grain, 512, 1, 0.5); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Color adjustment", NK_MINIMIZED)) { struct pl_color_adjustment *adj = &p->color_adjustment; nk_layout_row_dynamic(nk, 24, 2); par->color_adjustment = nk_check_label(nk, "Enable", par->color_adjustment) ? adj : NULL; if (nk_button_label(nk, "Default values")) *adj = pl_color_adjustment_neutral; nk_property_float(nk, "Brightness", -1, &adj->brightness, 1, 0.1, 0.005); nk_property_float(nk, "Contrast", 0, &adj->contrast, 10, 0.1, 0.005); // Convert to (cyclical) degrees for display int deg = roundf(adj->hue * 180.0 / M_PI); nk_property_int(nk, "Hue (°)", -50, °, 400, 1, 1); adj->hue = ((deg + 360) % 360) * M_PI / 180.0; nk_property_float(nk, "Saturation", 0, &adj->saturation, 10, 0.1, 0.005); nk_property_float(nk, "Gamma", 0, &adj->gamma, 10, 0.1, 0.005); // Convert to human-friendly temperature values for display int temp = (int) roundf(adj->temperature * 3500) + 6500; nk_property_int(nk, "Temperature (K)", 3000, &temp, 10000, 10, 5); adj->temperature = (temp - 6500) / 3500.0; struct pl_cone_params *cpar = &p->cone_params; nk_layout_row_dynamic(nk, 24, 2); par->cone_params = nk_check_label(nk, "Color blindness", par->cone_params) ? cpar : NULL; if (nk_button_label(nk, "Default values")) *cpar = pl_vision_normal; nk_layout_row(nk, NK_DYNAMIC, 24, 5, (float[]){ 0.25, 0.25/3, 0.25/3, 0.25/3, 0.5 }); nk_label(nk, "Cone model:", NK_TEXT_LEFT); unsigned int cones = cpar->cones; nk_checkbox_flags_label(nk, "L", &cones, PL_CONE_L); nk_checkbox_flags_label(nk, "M", &cones, PL_CONE_M); nk_checkbox_flags_label(nk, "S", &cones, PL_CONE_S); cpar->cones = cones; nk_property_float(nk, "Sensitivity", 0.0, &cpar->strength, 5.0, 0.1, 0.01); nk_tree_pop(nk); } if (is_file_hdr(p)) { if (nk_tree_push(nk, NK_TREE_NODE, "HDR peak detection", NK_MINIMIZED)) { struct pl_peak_detect_params *ppar = &p->peak_detect_params; nk_layout_row_dynamic(nk, 24, 2); par->peak_detect_params = nk_check_label(nk, "Enable", par->peak_detect_params) ? ppar : NULL; if (nk_button_label(nk, "Reset settings")) *ppar = pl_peak_detect_default_params; nk_property_float(nk, "Threshold low", 0.0, &ppar->scene_threshold_low, 20.0, 0.5, 0.005); nk_property_float(nk, "Threshold high", 0.0, &ppar->scene_threshold_high, 20.0, 0.5, 0.005); nk_property_float(nk, "Smoothing period", 1.0, &ppar->smoothing_period, 1000.0, 5.0, 1.0); nk_property_float(nk, "Minimum peak", 0.0, &ppar->minimum_peak, 10.0, 0.1, 0.01); int overshoot = roundf(ppar->overshoot_margin * 100.0); nk_property_int(nk, "Overshoot (%)", 0, &overshoot, 200, 1, 1); ppar->overshoot_margin = overshoot / 100.0; nk_tree_pop(nk); } } if (nk_tree_push(nk, NK_TREE_NODE, "Tone mapping", NK_MINIMIZED)) { struct pl_color_map_params *cpar = &p->color_map_params; static const struct pl_color_map_params null_settings = {0}; nk_layout_row_dynamic(nk, 24, 2); par->color_map_params = nk_check_label(nk, "Enable", par->color_map_params == cpar) ? cpar : &null_settings; if (nk_button_label(nk, "Reset settings")) *cpar = pl_color_map_default_params; static const char *rendering_intents[4] = { [PL_INTENT_PERCEPTUAL] = "Perceptual", [PL_INTENT_RELATIVE_COLORIMETRIC] = "Relative colorimetric", [PL_INTENT_SATURATION] = "Saturation", [PL_INTENT_ABSOLUTE_COLORIMETRIC] = "Absolute colorimetric", }; nk_label(nk, "Rendering intent:", NK_TEXT_LEFT); cpar->intent = nk_combo(nk, rendering_intents, 4, cpar->intent, 16, nk_vec2(nk_widget_width(nk), 100)); static const char *gamut_modes[PL_GAMUT_MODE_COUNT] = { [PL_GAMUT_CLIP] = "Hard-clip", [PL_GAMUT_WARN] = "Highlight", [PL_GAMUT_DARKEN] = "Darken", [PL_GAMUT_DESATURATE] = "Desaturate", }; nk_label(nk, "Out-of-gamut handling:", NK_TEXT_LEFT); cpar->gamut_mode = nk_combo(nk, gamut_modes, PL_GAMUT_MODE_COUNT, cpar->gamut_mode, 16, nk_vec2(nk_widget_width(nk), 300)); nk_label(nk, "Tone mapping function:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, cpar->tone_mapping_function->description, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); for (int i = 0; i < pl_num_tone_map_functions; i++) { const struct pl_tone_map_function *f = pl_tone_map_functions[i]; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) { if (f != cpar->tone_mapping_function) cpar->tone_mapping_param = f->param_def; cpar->tone_mapping_function = f; } } nk_combo_end(nk); } static const char *tone_mapping_modes[PL_TONE_MAP_MODE_COUNT] = { [PL_TONE_MAP_AUTO] = "Automatic selection", [PL_TONE_MAP_RGB] = "Per-channel (RGB)", [PL_TONE_MAP_MAX] = "Maximum component", [PL_TONE_MAP_HYBRID] = "Hybrid luminance", [PL_TONE_MAP_LUMA] = "Luminance (BT.2446 A)", }; nk_label(nk, "Tone mapping mode:", NK_TEXT_LEFT); cpar->tone_mapping_mode = nk_combo(nk, tone_mapping_modes, PL_TONE_MAP_MODE_COUNT, cpar->tone_mapping_mode, 16, nk_vec2(nk_widget_width(nk), 300)); nk_label(nk, "Algorithm parameter:", NK_TEXT_LEFT); const struct pl_tone_map_function *fun = cpar->tone_mapping_function; if (fun->param_desc) { nk_property_float(nk, fun->param_desc, fmaxf(fun->param_min, 0.001), &cpar->tone_mapping_param, fun->param_max, 0.01, 0.001); } else { nk_label(nk, "(N/A)", NK_TEXT_LEFT); } nk_property_int(nk, "LUT size", 16, &cpar->lut_size, 1024, 1, 1); nk_property_float(nk, "Crosstalk", 0.0, &cpar->tone_mapping_crosstalk, 0.30, 0.01, 0.001); nk_checkbox_label(nk, "Inverse tone mapping", &cpar->inverse_tone_mapping); nk_checkbox_label(nk, "Force full LUT", &cpar->force_tone_mapping_lut); nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop .cube file here...") && dropped_file) { uint8_t *buf; size_t size; int ret = av_file_map(dropped_file, &buf, &size, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { pl_lut_free((struct pl_custom_lut **) &par->lut); par->lut = pl_lut_parse_cube(p->log, (char *) buf, size); av_file_unmap(buf, size); } } static const char *lut_types[] = { [PL_LUT_UNKNOWN] = "Auto (unknown)", [PL_LUT_NATIVE] = "Raw RGB (native)", [PL_LUT_NORMALIZED] = "Linear RGB (normalized)", [PL_LUT_CONVERSION] = "Gamut conversion (native)", }; nk_layout_row(nk, NK_DYNAMIC, 24, 3, (float[]){ 0.2, 0.3, 0.5 }); if (nk_button_label(nk, "Reset LUT")) { pl_lut_free((struct pl_custom_lut **) &par->lut); par->lut_type = PL_LUT_UNKNOWN; } nk_label(nk, "LUT type:", NK_TEXT_CENTERED); par->lut_type = nk_combo(nk, lut_types, 4, par->lut_type, 16, nk_vec2(nk_widget_width(nk), 100)); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Dithering", NK_MINIMIZED)) { struct pl_dither_params *dpar = &p->dither_params; nk_layout_row_dynamic(nk, 24, 2); par->dither_params = nk_check_label(nk, "Enable", par->dither_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_dither_default_params; static const char *dither_methods[PL_DITHER_METHOD_COUNT] = { [PL_DITHER_BLUE_NOISE] = "Blue noise", [PL_DITHER_ORDERED_LUT] = "Ordered (LUT)", [PL_DITHER_ORDERED_FIXED] = "Ordered (fixed size)", [PL_DITHER_WHITE_NOISE] = "White noise", }; nk_label(nk, "Dither method:", NK_TEXT_LEFT); dpar->method = nk_combo(nk, dither_methods, PL_DITHER_METHOD_COUNT, dpar->method, 16, nk_vec2(nk_widget_width(nk), 100)); static const char *lut_sizes[8] = { "2x2", "4x4", "8x8", "16x16", "32x32", "64x64", "128x128", "256x256", }; nk_label(nk, "LUT size:", NK_TEXT_LEFT); switch (dpar->method) { case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: { int size = dpar->lut_size - 1; nk_combobox(nk, lut_sizes, 8, &size, 16, nk_vec2(nk_widget_width(nk), 200)); dpar->lut_size = size + 1; break; } case PL_DITHER_ORDERED_FIXED: nk_label(nk, "64x64", NK_TEXT_LEFT); break; default: nk_label(nk, "(N/A)", NK_TEXT_LEFT); break; } nk_checkbox_label(nk, "Temporal dithering", &dpar->temporal); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Output color space", NK_MINIMIZED)) { struct pl_color_space *tcol = &p->target_color; struct pl_color_repr *trepr = &p->target_repr; nk_layout_row_dynamic(nk, 24, 2); nk_checkbox_label(nk, "Enable", &p->target_override); bool reset = nk_button_label(nk, "Reset settings"); bool reset_icc = reset; nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); static const char *primaries[PL_COLOR_PRIM_COUNT] = { [PL_COLOR_PRIM_UNKNOWN] = "Auto (unknown)", [PL_COLOR_PRIM_BT_601_525] = "ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C)", [PL_COLOR_PRIM_BT_601_625] = "ITU-R Rec. BT.601 (625-line = PAL, SECAM)", [PL_COLOR_PRIM_BT_709] = "ITU-R Rec. BT.709 (HD), also sRGB", [PL_COLOR_PRIM_BT_470M] = "ITU-R Rec. BT.470 M", [PL_COLOR_PRIM_EBU_3213] = "EBU Tech. 3213-E / JEDEC P22 phosphors", [PL_COLOR_PRIM_BT_2020] = "ITU-R Rec. BT.2020 (UltraHD)", [PL_COLOR_PRIM_APPLE] = "Apple RGB", [PL_COLOR_PRIM_ADOBE] = "Adobe RGB (1998)", [PL_COLOR_PRIM_PRO_PHOTO] = "ProPhoto RGB (ROMM)", [PL_COLOR_PRIM_CIE_1931] = "CIE 1931 RGB primaries", [PL_COLOR_PRIM_DCI_P3] = "DCI-P3 (Digital Cinema)", [PL_COLOR_PRIM_DISPLAY_P3] = "DCI-P3 (Digital Cinema) with D65 white point", [PL_COLOR_PRIM_V_GAMUT] = "Panasonic V-Gamut (VARICAM)", [PL_COLOR_PRIM_S_GAMUT] = "Sony S-Gamut", [PL_COLOR_PRIM_FILM_C] = "Traditional film primaries with Illuminant C", }; nk_label(nk, "Primaries:", NK_TEXT_LEFT); tcol->primaries = nk_combo(nk, primaries, PL_COLOR_PRIM_COUNT, tcol->primaries, 16, nk_vec2(nk_widget_width(nk), 200)); static const char *transfers[PL_COLOR_TRC_COUNT] = { [PL_COLOR_TRC_UNKNOWN] = "Auto (unknown)", [PL_COLOR_TRC_BT_1886] = "ITU-R Rec. BT.1886 (CRT emulation + OOTF)", [PL_COLOR_TRC_SRGB] = "IEC 61966-2-4 sRGB (CRT emulation)", [PL_COLOR_TRC_LINEAR] = "Linear light content", [PL_COLOR_TRC_GAMMA18] = "Pure power gamma 1.8", [PL_COLOR_TRC_GAMMA20] = "Pure power gamma 2.0", [PL_COLOR_TRC_GAMMA22] = "Pure power gamma 2.2", [PL_COLOR_TRC_GAMMA24] = "Pure power gamma 2.4", [PL_COLOR_TRC_GAMMA26] = "Pure power gamma 2.6", [PL_COLOR_TRC_GAMMA28] = "Pure power gamma 2.8", [PL_COLOR_TRC_PRO_PHOTO] = "ProPhoto RGB (ROMM)", [PL_COLOR_TRC_PQ] = "ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048", [PL_COLOR_TRC_HLG] = "ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67", [PL_COLOR_TRC_V_LOG] = "Panasonic V-Log (VARICAM)", [PL_COLOR_TRC_S_LOG1] = "Sony S-Log1", [PL_COLOR_TRC_S_LOG2] = "Sony S-Log2", }; nk_label(nk, "Transfer:", NK_TEXT_LEFT); tcol->transfer = nk_combo(nk, transfers, PL_COLOR_TRC_COUNT, tcol->transfer, 16, nk_vec2(nk_widget_width(nk), 200)); nk_layout_row_dynamic(nk, 24, 2); nk_checkbox_label(nk, "Override HDR levels", &p->levels_override); bool reset_levels = nk_button_label(nk, "Reset levels"); if (p->levels_override) { // Ensure these values are always legal by going through // `pl_color_space_infer`, without clobbering the rest nk_layout_row_dynamic(nk, 24, 2); struct pl_color_space fix = *tcol; pl_color_space_infer(&fix); fix.hdr.min_luma *= 1000; // better value range nk_property_float(nk, "White point (cd/m²)", 1e-2, &fix.hdr.max_luma, 10000.0, fix.hdr.max_luma / 100, fix.hdr.max_luma / 1000); nk_property_float(nk, "Black point (mcd/m²)", 1e-3, &fix.hdr.min_luma, 10000.0, fix.hdr.min_luma / 100, fix.hdr.min_luma / 1000); fix.hdr.min_luma /= 1000; pl_color_space_infer(&fix); tcol->hdr = fix.hdr; } else { reset_levels = true; } nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); static const char *systems[PL_COLOR_SYSTEM_COUNT] = { [PL_COLOR_SYSTEM_UNKNOWN] = "Auto (unknown)", [PL_COLOR_SYSTEM_BT_601] = "ITU-R Rec. BT.601 (SD)", [PL_COLOR_SYSTEM_BT_709] = "ITU-R Rec. BT.709 (HD)", [PL_COLOR_SYSTEM_SMPTE_240M] = "SMPTE-240M", [PL_COLOR_SYSTEM_BT_2020_NC] = "ITU-R Rec. BT.2020 (non-constant luminance)", [PL_COLOR_SYSTEM_BT_2020_C] = "ITU-R Rec. BT.2020 (constant luminance)", [PL_COLOR_SYSTEM_BT_2100_PQ] = "ITU-R Rec. BT.2100 ICtCp PQ variant", [PL_COLOR_SYSTEM_BT_2100_HLG] = "ITU-R Rec. BT.2100 ICtCp HLG variant", [PL_COLOR_SYSTEM_DOLBYVISION] = "Dolby Vision (invalid for output)", [PL_COLOR_SYSTEM_YCGCO] = "YCgCo (derived from RGB)", [PL_COLOR_SYSTEM_RGB] = "Red, Green and Blue", [PL_COLOR_SYSTEM_XYZ] = "CIE 1931 XYZ, pre-encoded with gamma 2.6", }; nk_label(nk, "System:", NK_TEXT_LEFT); trepr->sys = nk_combo(nk, systems, PL_COLOR_SYSTEM_COUNT, trepr->sys, 16, nk_vec2(nk_widget_width(nk), 200)); if (trepr->sys == PL_COLOR_SYSTEM_DOLBYVISION) trepr->sys =PL_COLOR_SYSTEM_UNKNOWN; static const char *levels[PL_COLOR_LEVELS_COUNT] = { [PL_COLOR_LEVELS_UNKNOWN] = "Auto (unknown)", [PL_COLOR_LEVELS_LIMITED] = "Limited/TV range, e.g. 16-235", [PL_COLOR_LEVELS_FULL] = "Full/PC range, e.g. 0-255", }; nk_label(nk, "Levels:", NK_TEXT_LEFT); trepr->levels = nk_combo(nk, levels, PL_COLOR_LEVELS_COUNT, trepr->levels, 16, nk_vec2(nk_widget_width(nk), 200)); static const char *alphas[PL_ALPHA_MODE_COUNT] = { [PL_ALPHA_UNKNOWN] = "Auto (unknown, or no alpha)", [PL_ALPHA_INDEPENDENT] = "Independent alpha channel", [PL_ALPHA_PREMULTIPLIED] = "Premultiplied alpha channel", }; nk_label(nk, "Alpha:", NK_TEXT_LEFT); trepr->alpha = nk_combo(nk, alphas, PL_ALPHA_MODE_COUNT, trepr->alpha, 16, nk_vec2(nk_widget_width(nk), 200)); // Adjust these two fields in unison int bits = trepr->bits.color_depth; nk_label(nk, "Bit depth:", NK_TEXT_LEFT); nk_property_int(nk, "", 0, &bits, 16, 1, 0); trepr->bits.color_depth = bits; trepr->bits.sample_depth = bits; nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop ICC profile here...") && dropped_file) { uint8_t *buf; size_t size; int ret = av_file_map(dropped_file, &buf, &size, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { av_file_unmap((void *) p->target_icc.data, p->target_icc.len); p->target_icc.data = buf; p->target_icc.len = size; p->target_icc.signature++; free(p->target_icc_name); p->target_icc_name = strdup(basename((char *) dropped_file)); } } if (p->target_icc.len) { nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.7, 0.3 }); nk_labelf(nk, NK_TEXT_LEFT, "Loaded: %s", p->target_icc_name ? p->target_icc_name : "(unknown)"); reset_icc |= nk_button_label(nk, "Reset ICC"); } // Apply the reset last to prevent the UI from flashing for a frame if (reset) { *tcol = (struct pl_color_space) {0}; *trepr = (struct pl_color_repr) {0}; } if (reset_icc && p->target_icc.len) { av_file_unmap((void *) p->target_icc.data, p->target_icc.len); free(p->target_icc_name); p->target_icc_name = NULL; p->target_icc = (struct pl_icc_profile) { .signature = p->target_icc.signature + 1, }; } if (reset_levels) tcol->hdr = (struct pl_hdr_metadata) {0}; nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Custom shaders", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop .hook/.glsl files here...") && dropped_file) { uint8_t *buf; size_t size; int ret = av_file_map(dropped_file, &buf, &size, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { const struct pl_hook *hook; hook = pl_mpv_user_shader_parse(p->win->gpu, (char *) buf, size); av_file_unmap(buf, size); add_hook(p, hook, dropped_file); } } const float px = 24.0; nk_layout_row_template_begin(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_dynamic(nk); nk_layout_row_template_end(nk); for (int i = 0; i < p->shader_num; i++) { if (i == 0) { nk_label(nk, "·", NK_TEXT_CENTERED); } else if (nk_button_symbol(nk, NK_SYMBOL_TRIANGLE_UP)) { const struct pl_hook *prev_hook = p->shader_hooks[i - 1]; char *prev_path = p->shader_paths[i - 1]; p->shader_hooks[i - 1] = p->shader_hooks[i]; p->shader_paths[i - 1] = p->shader_paths[i]; p->shader_hooks[i] = prev_hook; p->shader_paths[i] = prev_path; } if (i == p->shader_num - 1) { nk_label(nk, "·", NK_TEXT_CENTERED); } else if (nk_button_symbol(nk, NK_SYMBOL_TRIANGLE_DOWN)) { const struct pl_hook *next_hook = p->shader_hooks[i + 1]; char *next_path = p->shader_paths[i + 1]; p->shader_hooks[i + 1] = p->shader_hooks[i]; p->shader_paths[i + 1] = p->shader_paths[i]; p->shader_hooks[i] = next_hook; p->shader_paths[i] = next_path; } if (nk_button_symbol(nk, NK_SYMBOL_X)) { pl_mpv_user_shader_destroy(&p->shader_hooks[i]); free(p->shader_paths[i]); p->shader_num--; memmove(&p->shader_hooks[i], &p->shader_hooks[i+1], (p->shader_num - i) * sizeof(void *)); memmove(&p->shader_paths[i], &p->shader_paths[i+1], (p->shader_num - i) * sizeof(char *)); } if (i < p->shader_num) nk_label(nk, p->shader_paths[i], NK_TEXT_LEFT); } par->hooks = p->shader_hooks; par->num_hooks = p->shader_num; nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Debug", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 1); nk_checkbox_label(nk, "Allow delayed peak-detect", &par->allow_delayed_peak_detect); nk_checkbox_label(nk, "Preserve mixing cache", &par->preserve_mixing_cache); nk_checkbox_label(nk, "Disable linear scaling", &par->disable_linear_scaling); nk_checkbox_label(nk, "Disable built-in scalers", &par->disable_builtin_scalers); nk_checkbox_label(nk, "Force-enable 3DLUT", &par->force_icc_lut); nk_checkbox_label(nk, "Force-enable dither", &par->force_dither); nk_checkbox_label(nk, "Disable FBOs / advanced rendering", &par->disable_fbos); nk_checkbox_label(nk, "Disable constant hard-coding", &par->dynamic_constants); nk_checkbox_label(nk, "Ignore ICC profiles", &par->ignore_icc_profiles); if (nk_check_label(nk, "Ignore Dolby Vision metadata", p->ignore_dovi) != p->ignore_dovi) { // Flush the renderer cache on changes, since this can // drastically alter the subjective appearance of the stream pl_renderer_flush_cache(p->renderer); p->ignore_dovi = !p->ignore_dovi; } nk_layout_row_dynamic(nk, 24, 2); if (nk_button_label(nk, "Flush renderer cache")) pl_renderer_flush_cache(p->renderer); if (nk_button_label(nk, "Recreate renderer")) { pl_renderer_destroy(&p->renderer); p->renderer = pl_renderer_create(p->log, p->win->gpu); } if (nk_tree_push(nk, NK_TREE_NODE, "Shader passes", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 26, 1); nk_label(nk, "Full frames:", NK_TEXT_LEFT); for (int i = 0; i < p->num_frame_passes; i++) { struct pass_info *info = &p->frame_info[i]; nk_layout_row_dynamic(nk, 24, 1); nk_labelf(nk, NK_TEXT_LEFT, "- %s: %.3f / %.3f / %.3f ms", info->name, info->pass.last / 1e6, info->pass.average / 1e6, info->pass.peak / 1e6); nk_layout_row_dynamic(nk, 32, 1); if (nk_chart_begin(nk, NK_CHART_LINES, info->pass.num_samples, 0.0f, info->pass.peak)) { for (int k = 0; k < info->pass.num_samples; k++) nk_chart_push(nk, info->pass.samples[k]); nk_chart_end(nk); } } nk_layout_row_dynamic(nk, 26, 1); nk_label(nk, "Output blending:", NK_TEXT_LEFT); for (int i = 0; i < MAX_BLEND_FRAMES; i++) { struct pass_info *info = &p->blend_info[i]; if (!info->name) continue; nk_layout_row_dynamic(nk, 24, 1); nk_labelf(nk, NK_TEXT_LEFT, "- (%d frame%s) %s: %.3f / %.3f / %.3f ms", i, i > 1 ? "s" : "", info->name, info->pass.last / 1e6, info->pass.average / 1e6, info->pass.peak / 1e6); nk_layout_row_dynamic(nk, 32, 1); if (nk_chart_begin(nk, NK_CHART_LINES, info->pass.num_samples, 0.0f, info->pass.peak)) { for (int k = 0; k < info->pass.num_samples; k++) nk_chart_push(nk, info->pass.samples[k]); nk_chart_end(nk); } } nk_tree_pop(nk); } nk_tree_pop(nk); } } nk_end(nk); } #else static void update_settings(struct plplay *p) { } #endif // HAVE_NUKLEAR libplacebo-v4.192.1/demos/sdlimage.c000066400000000000000000000165401417677245700172440ustar00rootroot00000000000000/* Simple image viewer that opens an image using SDL2_image and presents it * to the screen. * * License: CC0 / Public Domain */ #include #include "common.h" #include "window.h" #include #include #include // Static configuration, done in the file to keep things simple static const char *icc_profile = ""; // path to ICC profile static const char *lut_file = ""; // path to .cube lut // Program state static pl_log logger; static struct window *win; // For rendering static pl_tex img_tex; static pl_tex osd_tex; static struct pl_plane img_plane; static struct pl_plane osd_plane; static pl_renderer renderer; static struct pl_custom_lut *lut; struct file { void *data; size_t size; }; static struct file icc_file; static bool open_file(const char *path, struct file *out) { if (!path || !path[0]) { *out = (struct file) {0}; return true; } FILE *fp = NULL; bool success = false; fp = fopen(path, "rb"); if (!fp) goto done; if (fseeko(fp, 0, SEEK_END)) goto done; off_t size = ftello(fp); if (size < 0) goto done; if (fseeko(fp, 0, SEEK_SET)) goto done; void *data = malloc(size); if (!fread(data, size, 1, fp)) goto done; *out = (struct file) { .data = data, .size = size, }; success = true; done: if (fp) fclose(fp); return success; } static void close_file(struct file *file) { if (!file->data) return; free(file->data); *file = (struct file) {0}; } static void uninit(int ret) { pl_renderer_destroy(&renderer); pl_tex_destroy(win->gpu, &img_tex); pl_tex_destroy(win->gpu, &osd_tex); close_file(&icc_file); pl_lut_free(&lut); window_destroy(&win); pl_log_destroy(&logger); exit(ret); } static bool upload_plane(const SDL_Surface *img, pl_tex *tex, struct pl_plane *plane) { if (!img) return false; SDL_Surface *fixed = NULL; const SDL_PixelFormat *fmt = img->format; if (SDL_ISPIXELFORMAT_INDEXED(fmt->format)) { // libplacebo doesn't handle indexed formats yet fixed = SDL_CreateRGBSurfaceWithFormat(0, img->w, img->h, 32, SDL_PIXELFORMAT_ABGR8888); SDL_BlitSurface((SDL_Surface *) img, NULL, fixed, NULL); img = fixed; fmt = img->format; } struct pl_plane_data data = { .type = PL_FMT_UNORM, .width = img->w, .height = img->h, .pixel_stride = fmt->BytesPerPixel, .row_stride = img->pitch, .pixels = img->pixels, }; uint64_t masks[4] = { fmt->Rmask, fmt->Gmask, fmt->Bmask, fmt->Amask }; pl_plane_data_from_mask(&data, masks); bool ok = pl_upload_plane(win->gpu, plane, tex, &data); SDL_FreeSurface(fixed); return ok; } static bool render_frame(const struct pl_swapchain_frame *frame) { pl_tex img = img_plane.texture; struct pl_frame image = { .num_planes = 1, .planes = { img_plane }, .repr = pl_color_repr_unknown, .color = pl_color_space_unknown, .crop = {0, 0, img->params.w, img->params.h}, }; // This seems to be the case for SDL2_image image.repr.alpha = PL_ALPHA_INDEPENDENT; struct pl_frame target; pl_frame_from_swapchain(&target, frame); target.profile = (struct pl_icc_profile) { .data = icc_file.data, .len = icc_file.size, }; pl_rect2df_aspect_copy(&target.crop, &image.crop, 0.0); struct pl_overlay osd; struct pl_overlay_part osd_part; if (osd_tex) { osd_part = (struct pl_overlay_part) { .src = { 0, 0, osd_tex->params.w, osd_tex->params.h }, .dst = { 0, 0, osd_tex->params.w, osd_tex->params.h }, }; osd = (struct pl_overlay) { .tex = osd_tex, .mode = PL_OVERLAY_NORMAL, .repr = image.repr, .color = image.color, .parts = &osd_part, .num_parts = 1, }; target.overlays = &osd; target.num_overlays = 1; if (frame->flipped) { osd_part.dst.y0 = frame->fbo->params.h - osd_part.dst.y0; osd_part.dst.y1 = frame->fbo->params.h - osd_part.dst.y1; } } // Use the heaviest preset purely for demonstration/testing purposes struct pl_render_params params = pl_render_high_quality_params; params.lut = lut; return pl_render_image(renderer, &image, &target, ¶ms); } int main(int argc, char **argv) { if (argc < 2 || argc > 3) { fprintf(stderr, "Usage: %s []\n", argv[0]); return 255; } const char *file = argv[1]; const char *overlay = argc > 2 ? argv[2] : NULL; logger = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_INFO, )); // Load image, do this first so we can use it for the window size SDL_Surface *img = IMG_Load(file); if (!img) { fprintf(stderr, "Failed loading '%s': %s\n", file, SDL_GetError()); uninit(1); } // Create window unsigned int start = SDL_GetTicks(); win = window_create(logger, &(struct window_params) { .title = "SDL2_image demo", .width = img->w, .height = img->h, }); if (!win) uninit(1); // Initialize rendering state if (!upload_plane(img, &img_tex, &img_plane)) { fprintf(stderr, "Failed uploading image plane!\n"); uninit(2); } SDL_FreeSurface(img); if (overlay) { SDL_Surface *osd = IMG_Load(overlay); if (!upload_plane(osd, &osd_tex, &osd_plane)) fprintf(stderr, "Failed uploading OSD plane.. continuing anyway\n"); SDL_FreeSurface(osd); } if (!open_file(icc_profile, &icc_file)) fprintf(stderr, "Failed opening ICC profile.. continuing anyway\n"); struct file lutf; if (open_file(lut_file, &lutf) && lutf.size) { if (!(lut = pl_lut_parse_cube(logger, lutf.data, lutf.size))) fprintf(stderr, "Failed parsing LUT.. continuing anyway\n"); close_file(&lutf); } renderer = pl_renderer_create(logger, win->gpu); unsigned int last = SDL_GetTicks(), frames = 0; printf("Took %u ms for initialization\n", last - start); // Render loop while (!win->window_lost) { struct pl_swapchain_frame frame; bool ok = pl_swapchain_start_frame(win->swapchain, &frame); if (!ok) { window_poll(win, true); continue; } if (!render_frame(&frame)) { fprintf(stderr, "libplacebo: Failed rendering frame!\n"); uninit(3); } ok = pl_swapchain_submit_frame(win->swapchain); if (!ok) { fprintf(stderr, "libplacebo: Failed submitting frame!\n"); uninit(3); } pl_swapchain_swap_buffers(win->swapchain); frames++; unsigned int now = SDL_GetTicks(); if (now - last > 5000) { printf("%u frames in %u ms = %f FPS\n", frames, now - last, 1000.0f * frames / (now - last)); last = now; frames = 0; } window_poll(win, false); } uninit(0); } libplacebo-v4.192.1/demos/ui.c000066400000000000000000000155121417677245700160720ustar00rootroot00000000000000#define NK_IMPLEMENTATION #include "ui.h" #include #include struct ui_vertex { float pos[2]; float coord[2]; uint8_t color[4]; }; #define NUM_VERTEX_ATTRIBS 3 struct ui { pl_gpu gpu; pl_dispatch dp; struct nk_context nk; struct nk_font_atlas atlas; struct nk_buffer cmds, verts, idx; pl_tex font_tex; struct pl_vertex_attrib attribs_pl[NUM_VERTEX_ATTRIBS]; struct nk_draw_vertex_layout_element attribs_nk[NUM_VERTEX_ATTRIBS+1]; struct nk_convert_config convert_cfg; }; struct ui *ui_create(pl_gpu gpu) { struct ui *ui = malloc(sizeof(struct ui)); if (!ui) return NULL; *ui = (struct ui) { .gpu = gpu, .dp = pl_dispatch_create(gpu->log, gpu), .attribs_pl = { { .name = "pos", .offset = offsetof(struct ui_vertex, pos), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "coord", .offset = offsetof(struct ui_vertex, coord), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "vcolor", .offset = offsetof(struct ui_vertex, color), .fmt = pl_find_named_fmt(gpu, "rgba8"), } }, .attribs_nk = { {NK_VERTEX_POSITION, NK_FORMAT_FLOAT, offsetof(struct ui_vertex, pos)}, {NK_VERTEX_TEXCOORD, NK_FORMAT_FLOAT, offsetof(struct ui_vertex, coord)}, {NK_VERTEX_COLOR, NK_FORMAT_R8G8B8A8, offsetof(struct ui_vertex, color)}, {NK_VERTEX_LAYOUT_END} }, .convert_cfg = { .vertex_layout = ui->attribs_nk, .vertex_size = sizeof(struct ui_vertex), .vertex_alignment = NK_ALIGNOF(struct ui_vertex), .shape_AA = NK_ANTI_ALIASING_ON, .line_AA = NK_ANTI_ALIASING_ON, .circle_segment_count = 22, .curve_segment_count = 22, .arc_segment_count = 22, .global_alpha = 1.0f, }, }; // Initialize font atlas using built-in font nk_font_atlas_init_default(&ui->atlas); nk_font_atlas_begin(&ui->atlas); struct nk_font *font = nk_font_atlas_add_default(&ui->atlas, 20, NULL); struct pl_tex_params tparams = { .format = pl_find_named_fmt(gpu, "r8"), .sampleable = true, .initial_data = nk_font_atlas_bake(&ui->atlas, &tparams.w, &tparams.h, NK_FONT_ATLAS_ALPHA8), .debug_tag = PL_DEBUG_TAG, }; ui->font_tex = pl_tex_create(gpu, &tparams); nk_font_atlas_end(&ui->atlas, nk_handle_ptr((void *) ui->font_tex), &ui->convert_cfg.null); nk_font_atlas_cleanup(&ui->atlas); if (!ui->font_tex) goto error; // Initialize nuklear state if (!nk_init_default(&ui->nk, &font->handle)) { fprintf(stderr, "NK: failed initializing UI!\n"); goto error; } nk_buffer_init_default(&ui->cmds); nk_buffer_init_default(&ui->verts); nk_buffer_init_default(&ui->idx); return ui; error: ui_destroy(&ui); return NULL; } void ui_destroy(struct ui **ptr) { struct ui *ui = *ptr; if (!ui) return; nk_buffer_free(&ui->cmds); nk_buffer_free(&ui->verts); nk_buffer_free(&ui->idx); nk_free(&ui->nk); nk_font_atlas_clear(&ui->atlas); pl_tex_destroy(ui->gpu, &ui->font_tex); pl_dispatch_destroy(&ui->dp); free(ui); *ptr = NULL; } void ui_update_input(struct ui *ui, const struct window *win) { int x, y; window_get_cursor(win, &x, &y); nk_input_begin(&ui->nk); nk_input_motion(&ui->nk, x, y); nk_input_button(&ui->nk, NK_BUTTON_LEFT, x, y, window_get_button(win, BTN_LEFT)); nk_input_button(&ui->nk, NK_BUTTON_RIGHT, x, y, window_get_button(win, BTN_RIGHT)); nk_input_button(&ui->nk, NK_BUTTON_MIDDLE, x, y, window_get_button(win, BTN_MIDDLE)); struct nk_vec2 scroll; window_get_scroll(win, &scroll.x, &scroll.y); nk_input_scroll(&ui->nk, scroll); nk_input_end(&ui->nk); } struct nk_context *ui_get_context(struct ui *ui) { return &ui->nk; } bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame) { if (nk_convert(&ui->nk, &ui->cmds, &ui->verts, &ui->idx, &ui->convert_cfg) != NK_CONVERT_SUCCESS) { fprintf(stderr, "NK: failed converting draw commands!\n"); return false; } const struct nk_draw_command *cmd = NULL; const uint8_t *vertices = nk_buffer_memory(&ui->verts); const nk_draw_index *indices = nk_buffer_memory(&ui->idx); nk_draw_foreach(cmd, &ui->nk, &ui->cmds) { if (!cmd->elem_count) continue; pl_shader sh = pl_dispatch_begin(ui->dp); pl_shader_custom(sh, &(struct pl_custom_shader) { .description = "nuklear UI", .body = (ui->gpu->glsl.version >= 130) ? "color = texture(ui_tex, coord).r * vcolor;" : "color = texture2D(ui_tex, coord).r * vcolor;", .output = PL_SHADER_SIG_COLOR, .num_descriptors = 1, .descriptors = &(struct pl_shader_desc) { .desc = { .name = "ui_tex", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = cmd->texture.ptr, .sample_mode = PL_TEX_SAMPLE_NEAREST, }, }, }); struct pl_color_repr repr = frame->color_repr; pl_shader_color_map(sh, NULL, pl_color_space_srgb, frame->color_space, NULL, false); pl_shader_encode_color(sh, &repr); bool ok = pl_dispatch_vertex(ui->dp, pl_dispatch_vertex_params( .shader = &sh, .target = frame->fbo, .blend_params = &pl_alpha_overlay, .scissors = { .x0 = cmd->clip_rect.x, .y0 = cmd->clip_rect.y, .x1 = cmd->clip_rect.x + cmd->clip_rect.w, .y1 = cmd->clip_rect.y + cmd->clip_rect.h, }, .vertex_attribs = ui->attribs_pl, .num_vertex_attribs = NUM_VERTEX_ATTRIBS, .vertex_stride = sizeof(struct ui_vertex), .vertex_position_idx = 0, .vertex_coords = PL_COORDS_ABSOLUTE, .vertex_flipped = frame->flipped, .vertex_type = PL_PRIM_TRIANGLE_LIST, .vertex_count = cmd->elem_count, .vertex_data = vertices, .index_data = indices, .index_fmt = PL_INDEX_UINT32, )); if (!ok) { fprintf(stderr, "placebo: failed rendering UI!\n"); return false; } indices += cmd->elem_count; } nk_clear(&ui->nk); nk_buffer_clear(&ui->cmds); nk_buffer_clear(&ui->verts); nk_buffer_clear(&ui->idx); return true; } libplacebo-v4.192.1/demos/ui.h000066400000000000000000000033131417677245700160730ustar00rootroot00000000000000#pragma once #define NK_INCLUDE_FIXED_TYPES #define NK_INCLUDE_DEFAULT_ALLOCATOR #define NK_INCLUDE_STANDARD_BOOL #define NK_INCLUDE_STANDARD_VARARGS #define NK_INCLUDE_VERTEX_BUFFER_OUTPUT #define NK_INCLUDE_FONT_BAKING #define NK_INCLUDE_DEFAULT_FONT #define NK_BUTTON_TRIGGER_ON_RELEASE #define NK_UINT_DRAW_INDEX #include #include "common.h" #include "window.h" struct ui; struct ui *ui_create(pl_gpu gpu); void ui_destroy(struct ui **ui); // Update/Logic/Draw cycle void ui_update_input(struct ui *ui, const struct window *window); struct nk_context *ui_get_context(struct ui *ui); bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame); // Helper function to draw a custom widget for drag&drop operations, returns // true if the widget is hovered static inline bool ui_widget_hover(struct nk_context *nk, const char *label) { struct nk_rect bounds; if (!nk_widget(&bounds, nk)) return false; struct nk_command_buffer *canvas = nk_window_get_canvas(nk); bool hover = nk_input_is_mouse_hovering_rect(&nk->input, bounds); float h, s, v; nk_color_hsv_f(&h, &s, &v, nk->style.window.background); struct nk_color background = nk_hsv_f(h, s, v + (hover ? 0.1f : -0.02f)); struct nk_color border = nk_hsv_f(h, s, v + 0.20f); nk_fill_rect(canvas, bounds, 0.0f, background); nk_stroke_rect(canvas, bounds, 0.0f, 2.0f, border); const float pad = 10.0f; struct nk_rect text = { .x = bounds.x + pad, .y = bounds.y + pad, .w = bounds.w - 2 * pad, .h = bounds.h - 2 * pad, }; nk_draw_text(canvas, text, label, nk_strlen(label), nk->style.font, background, nk->style.text.color); return hover; } libplacebo-v4.192.1/demos/utils.c000066400000000000000000000007541417677245700166170ustar00rootroot00000000000000#include "utils.h" #ifdef _WIN32 #include #else #include #endif bool utils_gettime(double *pTime) { #ifdef _WIN32 LARGE_INTEGER frequency, ts; QueryPerformanceFrequency(&frequency); QueryPerformanceCounter(&ts); *pTime = (double)ts.QuadPart / frequency.QuadPart; return true; #else struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return false; *pTime = ts.tv_sec + ts.tv_nsec * 1e-9; return true; #endif } libplacebo-v4.192.1/demos/utils.h000066400000000000000000000001561417677245700166200ustar00rootroot00000000000000#pragma once #include "common.h" // Get the current tick time in seconds bool utils_gettime(double *pTime); libplacebo-v4.192.1/demos/video-filtering.c000066400000000000000000000652611417677245700205520ustar00rootroot00000000000000/* Presented are two hypothetical scenarios of how one might use libplacebo * as something like an FFmpeg or mpv video filter. We examine two example * APIs (loosely modeled after real video filtering APIs) and how each style * would like to use libplacebo. * * For sake of a simple example, let's assume this is a debanding filter. * For those of you too lazy to compile/run this file but still want to see * results, these are from my machine (RX 5700 XT + 1950X, as of 2020-05-25): * * RADV+ACO: * api1: 10000 frames in 16.328440 s => 1.632844 ms/frame (612.43 fps) * render: 0.113524 ms, upload: 0.127551 ms, download: 0.146097 ms * api2: 10000 frames in 5.335634 s => 0.533563 ms/frame (1874.19 fps) * render: 0.064378 ms, upload: 0.000000 ms, download: 0.189719 ms * * AMDVLK: * api1: 10000 frames in 14.921859 s => 1.492186 ms/frame (670.16 fps) * render: 0.110603 ms, upload: 0.114412 ms, download: 0.115375 ms * api2: 10000 frames in 4.667386 s => 0.466739 ms/frame (2142.53 fps) * render: 0.030781 ms, upload: 0.000000 ms, download: 0.075237 ms * * You can see that AMDVLK is still better at doing texture streaming than * RADV - this is because as of writing RADV still does not support * asynchronous texture queues / DMA engine transfers. If we disable the * `async_transfer` option with AMDVLK we get this: * * api1: 10000 frames in 16.087723 s => 1.608772 ms/frame (621.59 fps) * render: 0.111154 ms, upload: 0.122476 ms, download: 0.133162 ms * api2: 10000 frames in 6.344959 s => 0.634496 ms/frame (1576.05 fps) * render: 0.031307 ms, upload: 0.000000 ms, download: 0.083520 ms * * License: CC0 / Public Domain */ #include #include #include #include #include #include #include #include "common.h" #ifdef _WIN32 #include #endif #include #include #include #include /////////////////////// /// API definitions /// /////////////////////// // Stuff that would be common to each API void *init(void); void uninit(void *priv); struct format { // For simplicity let's make a few assumptions here, since configuring the // texture format is not the point of this example. (In practice you can // go nuts with the `utils/upload.h` helpers) // // - All formats contain unsigned integers only // - All components have the same size in bits // - All components are in the "canonical" order // - All formats have power of two sizes only (2 or 4 components, not 3) // - All plane strides are a multiple of the pixel size int num_comps; int bitdepth; }; struct plane { int subx, suby; // subsampling shift struct format fmt; size_t stride; void *data; }; #define MAX_PLANES 4 struct image { int width, height; int num_planes; struct plane planes[MAX_PLANES]; // For API #2, the associated mapped buffer (if any) struct api2_buf *associated_buf; }; // Example API design #1: synchronous, blocking, double-copy (bad!) // // In this API, `api1_filter` must immediately return with the new data. // This prevents parallelism on the GPU and should be avoided if possible, // but sometimes that's what you have to work with. So this is what it // would look like. // // Also, let's assume this API design reconfigures the filter chain (using // a blank `proxy` image every time the image format or dimensions change, // and doesn't expect us to fail due to format mismatches or resource // exhaustion afterwards. bool api1_reconfig(void *priv, const struct image *proxy); bool api1_filter(void *priv, struct image *dst, struct image *src); // Example API design #2: asynchronous, streaming, queued, zero-copy (good!) // // In this API, `api2_process` will run by the calling code every so often // (e.g. when new data is available or expected). This function has access // to non-blocking functions `get_image` and `put_image` that interface // with the video filtering engine's internal queueing system. // // This API is also designed to feed multiple frames ahead of time, i.e. // it will feed us as many frames as it can while we're still returning // `API2_WANT_MORE`. To drain the filter chain, it would continue running // the process function until `API2_HAVE_MORE` is no longer present // in the output. // // This API is also designed to do zero-copy where possible. When it wants // to create a data buffer of a given size, it will call our function // `api2_alloc` which will return a buffer that we can process directly. // We can use this to do zero-copy uploading to the GPU, by creating // host-visible persistently mapped buffers. In order to prevent the video // filtering system from re-using our buffers while copies are happening, we // use special functions `image_lock` and `image_unlock` to increase a // refcount on the image's backing storage. (As is typical of such APIs) // // Finally, this API is designed to be fully dynamic: The image parameters // could change at any time, and we must be equipped to handle that. enum api2_status { // Negative values are used to signal error conditions API2_ERR_FMT = -2, // incompatible / unsupported format API2_ERR_UNKNOWN = -1, // some other error happened API2_OK = 0, // no error, no status - everything's good // Positive values represent a mask of status conditions API2_WANT_MORE = (1 << 0), // we want more frames, please feed some more! API2_HAVE_MORE = (1 << 1), // we have more frames but they're not ready }; enum api2_status api2_process(void *priv); // Functions for creating persistently mapped buffers struct api2_buf { void *data; size_t size; void *priv; }; bool api2_alloc(void *priv, size_t size, struct api2_buf *out); void api2_free(void *priv, const struct api2_buf *buf); // These functions are provided by the API. The exact details of how images // are enqueued, dequeued and locked are not really important here, so just // do something unrealistic but simple to demonstrate with. struct image *get_image(void); void put_image(struct image *img); void image_lock(struct image *img); void image_unlock(struct image *img); ///////////////////////////////// /// libplacebo implementation /// ///////////////////////////////// // For API #2: #define PARALLELISM 8 struct entry { pl_buf buf; // to stream the download pl_tex tex_in[MAX_PLANES]; pl_tex tex_out[MAX_PLANES]; struct image image; // For entries that are associated with a held image, so we can unlock them // as soon as possible struct image *held_image; pl_buf held_buf; }; // For both APIs: struct priv { pl_log log; pl_vulkan vk; pl_gpu gpu; pl_dispatch dp; pl_shader_obj dither_state; // Timer objects pl_timer render_timer; pl_timer upload_timer; pl_timer download_timer; uint64_t render_sum; uint64_t upload_sum; uint64_t download_sum; int render_count; int upload_count; int download_count; // API #1: A simple pair of input and output textures pl_tex tex_in[MAX_PLANES]; pl_tex tex_out[MAX_PLANES]; // API #2: A ring buffer of textures/buffers for streaming int idx_in; // points the next free entry int idx_out; // points to the first entry still in progress struct entry entries[PARALLELISM]; }; void *init(void) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_simple, .log_level = PL_LOG_WARN, )); p->vk = pl_vulkan_create(p->log, pl_vulkan_params( // Note: This is for API #2. In API #1 you could just pass params=NULL // and it wouldn't really matter much. .async_transfer = true, .async_compute = true, .queue_count = PARALLELISM, )); if (!p->vk) { fprintf(stderr, "Failed creating vulkan context\n"); goto error; } // Give this a shorter name for convenience p->gpu = p->vk->gpu; p->dp = pl_dispatch_create(p->log, p->gpu); if (!p->dp) { fprintf(stderr, "Failed creating shader dispatch object\n"); goto error; } p->render_timer = pl_timer_create(p->gpu); p->upload_timer = pl_timer_create(p->gpu); p->download_timer = pl_timer_create(p->gpu); return p; error: uninit(p); return NULL; } void uninit(void *priv) { struct priv *p = priv; // API #1 for (int i = 0; i < MAX_PLANES; i++) { pl_tex_destroy(p->gpu, &p->tex_in[i]); pl_tex_destroy(p->gpu, &p->tex_out[i]); } // API #2 for (int i = 0; i < PARALLELISM; i++) { pl_buf_destroy(p->gpu, &p->entries[i].buf); for (int j = 0; j < MAX_PLANES; j++) { pl_tex_destroy(p->gpu, &p->entries[i].tex_in[j]); pl_tex_destroy(p->gpu, &p->entries[i].tex_out[j]); } if (p->entries[i].held_image) image_unlock(p->entries[i].held_image); } pl_timer_destroy(p->gpu, &p->render_timer); pl_timer_destroy(p->gpu, &p->upload_timer); pl_timer_destroy(p->gpu, &p->download_timer); pl_shader_obj_destroy(&p->dither_state); pl_dispatch_destroy(&p->dp); pl_vulkan_destroy(&p->vk); pl_log_destroy(&p->log); free(p); } // Helper function to set up the `pl_plane_data` struct from the image params static void setup_plane_data(const struct image *img, struct pl_plane_data out[MAX_PLANES]) { for (int i = 0; i < img->num_planes; i++) { const struct plane *plane = &img->planes[i]; out[i] = (struct pl_plane_data) { .type = PL_FMT_UNORM, .width = img->width >> plane->subx, .height = img->height >> plane->suby, .pixel_stride = plane->fmt.num_comps * plane->fmt.bitdepth / 8, .row_stride = plane->stride, .pixels = plane->data, }; // For API 2 (direct rendering) if (img->associated_buf) { pl_buf buf = img->associated_buf->priv; out[i].pixels = NULL; out[i].buf = buf; out[i].buf_offset = (uintptr_t) plane->data - (uintptr_t) buf->data; } for (int c = 0; c < plane->fmt.num_comps; c++) { out[i].component_size[c] = plane->fmt.bitdepth; out[i].component_pad[c] = 0; out[i].component_map[c] = c; } } } static bool do_plane(struct priv *p, pl_tex dst, pl_tex src) { int new_depth = dst->params.format->component_depth[0]; // Do some debanding, and then also make sure to dither to the new depth // so that our debanded gradients are actually preserved well pl_shader sh = pl_dispatch_begin(p->dp); pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL); pl_shader_dither(sh, new_depth, &p->dither_state, NULL); return pl_dispatch_finish(p->dp, pl_dispatch_params( .shader = &sh, .target = dst, .timer = p->render_timer, )); } static void check_timers(struct priv *p) { uint64_t ret; while ((ret = pl_timer_query(p->gpu, p->render_timer))) { p->render_sum += ret; p->render_count++; } while ((ret = pl_timer_query(p->gpu, p->upload_timer))) { p->upload_sum += ret; p->upload_count++; } while ((ret = pl_timer_query(p->gpu, p->download_timer))) { p->download_sum += ret; p->download_count++; } } // API #1 implementation: // // In this design, we will create all GPU resources inside `reconfig`, based on // the texture format configured from the proxy image. This will avoid failing // later on due to e.g. resource exhaustion or texture format mismatch, and // thereby falls within the intended semantics of this style of API. bool api1_reconfig(void *priv, const struct image *proxy) { struct priv *p = priv; struct pl_plane_data data[MAX_PLANES]; setup_plane_data(proxy, data); for (int i = 0; i < proxy->num_planes; i++) { pl_fmt fmt = pl_plane_find_fmt(p->gpu, NULL, &data[i]); if (!fmt) { fprintf(stderr, "Failed configuring filter: no good texture format!\n"); return false; } bool ok = true; ok &= pl_tex_recreate(p->gpu, &p->tex_in[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .sampleable = true, .host_writable = true, )); ok &= pl_tex_recreate(p->gpu, &p->tex_out[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .renderable = true, .host_readable = true, )); if (!ok) { fprintf(stderr, "Failed creating GPU textures!\n"); return false; } } return true; } bool api1_filter(void *priv, struct image *dst, struct image *src) { struct priv *p = priv; struct pl_plane_data data[MAX_PLANES]; setup_plane_data(src, data); // Upload planes for (int i = 0; i < src->num_planes; i++) { bool ok = pl_tex_upload(p->gpu, pl_tex_transfer_params( .tex = p->tex_in[i], .row_pitch = data[i].row_stride, .ptr = src->planes[i].data, .timer = p->upload_timer, )); if (!ok) { fprintf(stderr, "Failed uploading data to the GPU!\n"); return false; } } // Process planes for (int i = 0; i < src->num_planes; i++) { if (!do_plane(p, p->tex_out[i], p->tex_in[i])) { fprintf(stderr, "Failed processing planes!\n"); return false; } } // Download planes for (int i = 0; i < src->num_planes; i++) { bool ok = pl_tex_download(p->gpu, pl_tex_transfer_params( .tex = p->tex_out[i], .row_pitch = dst->planes[i].stride, .ptr = dst->planes[i].data, .timer = p->download_timer, )); if (!ok) { fprintf(stderr, "Failed downloading data from the GPU!\n"); return false; } } check_timers(p); return true; } // API #2 implementation: // // In this implementation we maintain a queue (implemented as ring buffer) // of "work entries", which are isolated structs that hold independent GPU // resources - so that the GPU has no cross-entry dependencies on any of the // textures or other resources. (Side note: It still has a dependency on the // dither state, but this is just a shared LUT anyway) // Align up to the nearest multiple of a power of two #define ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) static enum api2_status submit_work(struct priv *p, struct entry *e, struct image *img) { // If the image comes from a mapped buffer, we have to take a lock // while our upload is in progress if (img->associated_buf) { assert(!e->held_image); image_lock(img); e->held_image = img; e->held_buf = img->associated_buf->priv; } // Upload this image's data struct pl_plane_data data[MAX_PLANES]; setup_plane_data(img, data); for (int i = 0; i < img->num_planes; i++) { pl_fmt fmt = pl_plane_find_fmt(p->gpu, NULL, &data[i]); if (!fmt) return API2_ERR_FMT; // FIXME: can we plumb a `pl_timer` in here somehow? if (!pl_upload_plane(p->gpu, NULL, &e->tex_in[i], &data[i])) return API2_ERR_UNKNOWN; // Re-create the target FBO as well with this format if necessary bool ok = pl_tex_recreate(p->gpu, &e->tex_out[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .renderable = true, .host_readable = true, )); if (!ok) return API2_ERR_UNKNOWN; } // Dispatch the work for this image for (int i = 0; i < img->num_planes; i++) { if (!do_plane(p, e->tex_out[i], e->tex_in[i])) return API2_ERR_UNKNOWN; } // Set up the resulting `struct image` that will hold our target // data. We just copy the format etc. from the source image memcpy(&e->image, img, sizeof(struct image)); size_t offset[MAX_PLANES], stride[MAX_PLANES], total_size = 0; for (int i = 0; i < img->num_planes; i++) { // For performance, we want to make sure we align the stride // to a multiple of the GPU's preferred texture transfer stride // (This is entirely optional) stride[i] = ALIGN2(img->planes[i].stride, p->gpu->limits.align_tex_xfer_pitch); int height = img->height >> img->planes[i].suby; // Round up the offset to the nearest multiple of the optimal // transfer alignment. (This is also entirely optional) offset[i] = ALIGN2(total_size, p->gpu->limits.align_tex_xfer_offset); total_size = offset[i] + stride[i] * height; } // Dispatch the asynchronous download into a mapped buffer bool ok = pl_buf_recreate(p->gpu, &e->buf, pl_buf_params( .size = total_size, .host_mapped = true, )); if (!ok) return API2_ERR_UNKNOWN; for (int i = 0; i < img->num_planes; i++) { ok = pl_tex_download(p->gpu, pl_tex_transfer_params( .tex = e->tex_out[i], .row_pitch = stride[i], .buf = e->buf, .buf_offset = offset[i], .timer = p->download_timer, )); if (!ok) return API2_ERR_UNKNOWN; // Update the output fields e->image.planes[i].data = e->buf->data + offset[i]; e->image.planes[i].stride = stride[i]; } // Make sure this work starts processing in the background, and especially // so we can move on to the next queue on the gPU pl_gpu_flush(p->gpu); return API2_OK; } enum api2_status api2_process(void *priv) { struct priv *p = priv; enum api2_status ret = 0; // Opportunistically release any held images. We do this across the ring // buffer, rather than doing this as part of the following loop, because // we want to release images ahead-of-time (no FIFO constraints) for (int i = 0; i < PARALLELISM; i++) { struct entry *e = &p->entries[i]; if (e->held_image && !pl_buf_poll(p->gpu, e->held_buf, 0)) { // upload buffer is no longer in use, release it image_unlock(e->held_image); e->held_image = NULL; e->held_buf = NULL; } } // Poll the status of existing entries and dequeue the ones that are done while (p->idx_out != p->idx_in) { struct entry *e = &p->entries[p->idx_out]; if (pl_buf_poll(p->gpu, e->buf, 0)) break; if (e->held_image) { image_unlock(e->held_image); e->held_image = NULL; e->held_buf = NULL; } // download buffer is no longer busy, dequeue the frame put_image(&e->image); p->idx_out = (p->idx_out + 1) % PARALLELISM; } // Fill up the queue with more work int last_free_idx = (p->idx_out ? p->idx_out : PARALLELISM) - 1; while (p->idx_in != last_free_idx) { struct image *img = get_image(); if (!img) { ret |= API2_WANT_MORE; break; } enum api2_status err = submit_work(p, &p->entries[p->idx_in], img); if (err < 0) return err; p->idx_in = (p->idx_in + 1) % PARALLELISM; } if (p->idx_out != p->idx_in) ret |= API2_HAVE_MORE; return ret; } bool api2_alloc(void *priv, size_t size, struct api2_buf *out) { struct priv *p = priv; if (!p->gpu->limits.buf_transfer || size > p->gpu->limits.max_mapped_size) return false; pl_buf buf = pl_buf_create(p->gpu, pl_buf_params( .size = size, .host_mapped = true, )); if (!buf) return false; *out = (struct api2_buf) { .data = buf->data, .size = size, .priv = (void *) buf, }; return true; } void api2_free(void *priv, const struct api2_buf *buf) { struct priv *p = priv; pl_buf plbuf = buf->priv; pl_buf_destroy(p->gpu, &plbuf); } //////////////////////////////////// /// Proof of Concept / Benchmark /// //////////////////////////////////// #define FRAMES 10000 // Let's say we're processing a 1920x1080 4:2:0 8-bit NV12 video, arbitrarily // with a stride aligned to 256 bytes. (For no particular reason) #define TEXELSZ sizeof(uint8_t) #define WIDTH 1920 #define HEIGHT 1080 #define STRIDE (ALIGN2(WIDTH, 256) * TEXELSZ) // Subsampled planes #define SWIDTH (WIDTH >> 1) #define SHEIGHT (HEIGHT >> 1) #define SSTRIDE (ALIGN2(SWIDTH, 256) * TEXELSZ) // Plane offsets / sizes #define SIZE0 (HEIGHT * STRIDE) #define SIZE1 (2 * SHEIGHT * SSTRIDE) #define OFFSET0 0 #define OFFSET1 SIZE0 #define BUFSIZE (OFFSET1 + SIZE1) // Skeleton of an example image static const struct image example_image = { .width = WIDTH, .height = HEIGHT, .num_planes = 2, .planes = { { .subx = 0, .suby = 0, .stride = STRIDE, .fmt = { .num_comps = 1, .bitdepth = 8 * TEXELSZ, }, }, { .subx = 1, .suby = 1, .stride = SSTRIDE * 2, .fmt = { .num_comps = 2, .bitdepth = 8 * TEXELSZ, }, }, }, }; // API #1: Nice and simple (but slow) static void api1_example(void) { struct priv *vf = init(); if (!vf) return; if (!api1_reconfig(vf, &example_image)) { fprintf(stderr, "api1: Failed configuring video filter!\n"); return; } // Allocate two buffers to hold the example data, and fill the source // buffer arbitrarily with a "simple" pattern. (Decoding the data into // the buffer is not meant to be part of this benchmark) uint8_t *srcbuf = malloc(BUFSIZE), *dstbuf = malloc(BUFSIZE); if (!srcbuf || !dstbuf) goto done; for (size_t i = 0; i < BUFSIZE; i++) srcbuf[i] = i; struct image src = example_image, dst = example_image; src.planes[0].data = srcbuf + OFFSET0; src.planes[1].data = srcbuf + OFFSET1; dst.planes[0].data = dstbuf + OFFSET0; dst.planes[1].data = dstbuf + OFFSET1; struct timeval start = {0}, stop = {0}; gettimeofday(&start, NULL); // Process this dummy frame a bunch of times unsigned frames = 0; for (frames = 0; frames < FRAMES; frames++) { if (!api1_filter(vf, &dst, &src)) { fprintf(stderr, "api1: Failed filtering frame... aborting\n"); break; } } gettimeofday(&stop, NULL); float secs = (float) (stop.tv_sec - start.tv_sec) + 1e-6 * (stop.tv_usec - start.tv_usec); printf("api1: %4u frames in %1.6f s => %2.6f ms/frame (%5.2f fps)\n", frames, secs, 1000 * secs / frames, frames / secs); if (vf->render_count) { printf(" render: %f ms, upload: %f ms, download: %f ms\n", 1e-6 * vf->render_sum / vf->render_count, vf->upload_count ? (1e-6 * vf->upload_sum / vf->upload_count) : 0.0, vf->download_count ? (1e-6 * vf->download_sum / vf->download_count) : 0.0); } done: free(srcbuf); free(dstbuf); uninit(vf); } // API #2: Pretend we have some fancy pool of images. #define POOLSIZE (PARALLELISM + 1) static struct api2_buf buffers[POOLSIZE] = {0}; static struct image images[POOLSIZE] = {0}; static int refcount[POOLSIZE] = {0}; static unsigned api2_frames_in = 0; static unsigned api2_frames_out = 0; static void api2_example(void) { struct priv *vf = init(); if (!vf) return; // Set up a bunch of dummy images for (int i = 0; i < POOLSIZE; i++) { uint8_t *data; images[i] = example_image; if (api2_alloc(vf, BUFSIZE, &buffers[i])) { data = buffers[i].data; images[i].associated_buf = &buffers[i]; } else { // Fall back in case mapped buffers are unsupported fprintf(stderr, "warning: falling back to malloc, may be slow\n"); data = malloc(BUFSIZE); } // Fill with some "data" (like in API #1) for (size_t n = 0; n < BUFSIZE; n++) data[i] = n; images[i].planes[0].data = data + OFFSET0; images[i].planes[1].data = data + OFFSET1; } struct timeval start = {0}, stop = {0}; gettimeofday(&start, NULL); // Just keep driving the event loop regardless of the return status // until we reach the critical number of frames. (Good enough for this PoC) while (api2_frames_out < FRAMES) { enum api2_status ret = api2_process(vf); if (ret < 0) { fprintf(stderr, "api2: Failed processing... aborting\n"); break; } // Sleep a short time (100us) to prevent busy waiting the CPU #ifdef _WIN32 Sleep(0); #else nanosleep(&(struct timespec) { .tv_nsec = 100000 }, NULL); #endif check_timers(vf); } gettimeofday(&stop, NULL); float secs = (float) (stop.tv_sec - start.tv_sec) + 1e-6 * (stop.tv_usec - start.tv_usec); printf("api2: %4u frames in %1.6f s => %2.6f ms/frame (%5.2f fps)\n", api2_frames_out, secs, 1000 * secs / api2_frames_out, api2_frames_out / secs); if (vf->render_count) { printf(" render: %f ms, upload: %f ms, download: %f ms\n", 1e-6 * vf->render_sum / vf->render_count, vf->upload_count ? (1e-6 * vf->upload_sum / vf->upload_count) : 0.0, vf->download_count ? (1e-6 * vf->download_sum / vf->download_count) : 0.0); } for (int i = 0; i < POOLSIZE; i++) { if (images[i].associated_buf) { api2_free(vf, images[i].associated_buf); } else { // This is what we originally malloc'd free(images[i].planes[0].data); } } uninit(vf); } struct image *get_image(void) { if (api2_frames_in == FRAMES) return NULL; // simulate EOF, to avoid queueing up "extra" work // if we can find a free (unlocked) image, give it that for (int i = 0; i < POOLSIZE; i++) { if (refcount[i] == 0) { api2_frames_in++; return &images[i]; } } return NULL; // no free image available } void put_image(struct image *img) { (void)img; api2_frames_out++; } void image_lock(struct image *img) { int index = img - images; // cheat, for lack of having actual image management refcount[index]++; } void image_unlock(struct image *img) { int index = img - images; refcount[index]--; } int main(void) { printf("Running benchmarks...\n"); api1_example(); api2_example(); return 0; } libplacebo-v4.192.1/demos/volcanologo.png000066400000000000000000000103111417677245700203310ustar00rootroot00000000000000PNG  IHDR@U"niCCPicc(u;KA?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdeghijklmnopqrstuvwxyz{}~y7 IDAT |Ot,c6mSqWa"Չ5M!%jLlYRrgd0dl%Q >828kF",ɳmgCӋ6gXP=?Ip2b uWB tup5"~eBůp1/"~eD%TG)O\$t(9#@:E4>^CB)XLȀ)qB2(b`E G]*E N<AT5ХRhE<ATMTKQ&E<AY;)ABᑦPj52XԤr>/J` AvRLex @o.QoJ>T7AiJ%O (Z&,4羻Śk[eYxn4?G`u9)`b9'QDpb:'* w:JpYT^ P6Q̀ѻ4 fU(W{p~Z8Et8YN^?J]:Q Nޡ ѐJ,UoBך/$X=GX(F[P '}(vAKq  S&_h ,n PbՅI,Z/͢2ΚQ\*cá+]|Il6(ra1`TG: yʯHtZ*:tXAj>pV?rvY 9LH &**j>0KT'\B1OkPʊy7*Q0XMgKh=\&D\X | Sd܂94=N%.}@xJ78 8RvϞHj(8 R_*{"ݩ˄ȡHDXjҠܟtb%|n*KR? b(&`5E,N5b:R bR ^8JA(ܸHѲ pKpHQʇpk+Eg-X `Z@4*P(J]B25=F(.!$ )އAE+X%Q\nB4X6/BS 6dS&S[N )& 0ȣE V5*7CwYʢr )мD%a(7HpD S<XD)jOqvT*6hPdC3bSSEhMQ@QPm(MC0ʣŇK^o ?ChXQ TA9ATDIMP%f 7ph|b(B`L)=`64S< ѝJ̤%t:j߇"``І ݸJQ""S$C-zdQ$@LF?S|iKM(\QC*?Hrm5L(B`L)!.Q7ͫ@ 8K)aГJcHGh&kp.'0ɣx#V?}>׶$f݃P*#׼Lk]y/r95qY"&|GQ~"&[g$ nՠr(J}nname); struct window *win = (*impl)->create(log, params); if (win) return win; } fprintf(stderr, "No windowing system / graphical API compiled or supported!\n"); exit(1); } void window_destroy(struct window **win) { if (*win) (*win)->impl->destroy(win); } void window_poll(struct window *win, bool block) { return win->impl->poll(win, block); } void window_get_cursor(const struct window *win, int *x, int *y) { return win->impl->get_cursor(win, x, y); } void window_get_scroll(const struct window *win, float *dx, float *dy) { return win->impl->get_scroll(win, dx, dy); } bool window_get_button(const struct window *win, enum button btn) { return win->impl->get_button(win, btn); } bool window_get_key(const struct window *win, enum key key) { return win->impl->get_key(win, key); } char *window_get_file(const struct window *win) { return win->impl->get_file(win); } libplacebo-v4.192.1/demos/window.h000066400000000000000000000025641417677245700167740ustar00rootroot00000000000000// License: CC0 / Public Domain #pragma once #include struct window { const struct window_impl *impl; pl_swapchain swapchain; pl_gpu gpu; bool window_lost; }; struct window_params { const char *title; int width; int height; // initial color space struct pl_swapchain_colors colors; bool alpha; }; struct window *window_create(pl_log log, const struct window_params *params); void window_destroy(struct window **win); // Poll/wait for window events void window_poll(struct window *win, bool block); // Input handling enum button { BTN_LEFT, BTN_RIGHT, BTN_MIDDLE, }; enum key { KEY_ESC, }; void window_get_cursor(const struct window *win, int *x, int *y); void window_get_scroll(const struct window *win, float *dx, float *dy); bool window_get_button(const struct window *win, enum button); bool window_get_key(const struct window *win, enum key); char *window_get_file(const struct window *win); // For implementations struct window_impl { const char *name; __typeof__(window_create) *create; __typeof__(window_destroy) *destroy; __typeof__(window_poll) *poll; __typeof__(window_get_cursor) *get_cursor; __typeof__(window_get_scroll) *get_scroll; __typeof__(window_get_button) *get_button; __typeof__(window_get_key) *get_key; __typeof__(window_get_file) *get_file; }; libplacebo-v4.192.1/demos/window_glfw.c000066400000000000000000000262131417677245700200030ustar00rootroot00000000000000// License: CC0 / Public Domain #if defined(USE_GL) + defined(USE_VK) + defined(USE_D3D11) != 1 #error Specify exactly one of -DUSE_GL, -DUSE_VK or -DUSE_D3D11 when compiling! #endif #include #include #include "common.h" #include "window.h" #ifdef USE_VK #define VK_NO_PROTOTYPES #include #define GLFW_INCLUDE_VULKAN #define IMPL win_impl_glfw_vk #define IMPL_NAME "GLFW (vulkan)" #endif #ifdef USE_GL #include #define IMPL win_impl_glfw_gl #define IMPL_NAME "GLFW (opengl)" #endif #ifdef USE_D3D11 #include #define IMPL win_impl_glfw_d3d11 #define IMPL_NAME "GLFW (D3D11)" #endif #include #ifdef USE_D3D11 #define GLFW_EXPOSE_NATIVE_WIN32 #include #endif #ifdef NDEBUG #define DEBUG false #else #define DEBUG true #endif #define PL_ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) const struct window_impl IMPL; struct priv { struct window w; GLFWwindow *win; #ifdef USE_VK VkSurfaceKHR surf; pl_vulkan vk; pl_vk_inst vk_inst; #endif #ifdef USE_GL pl_opengl gl; #endif #ifdef USE_D3D11 pl_d3d11 d3d11; #endif float scroll_dx, scroll_dy; char **files; size_t files_num; size_t files_size; bool file_seen; }; static void err_cb(int code, const char *desc) { fprintf(stderr, "GLFW err %d: %s\n", code, desc); } static void close_cb(GLFWwindow *win) { struct priv *p = glfwGetWindowUserPointer(win); p->w.window_lost = true; } static void resize_cb(GLFWwindow *win, int width, int height) { struct priv *p = glfwGetWindowUserPointer(win); if (!pl_swapchain_resize(p->w.swapchain, &width, &height)) { fprintf(stderr, "libplacebo: Failed resizing swapchain? Exiting...\n"); p->w.window_lost = true; } } static void scroll_cb(GLFWwindow *win, double dx, double dy) { struct priv *p = glfwGetWindowUserPointer(win); p->scroll_dx += dx; p->scroll_dy += dy; } static void drop_cb(GLFWwindow *win, int num, const char *files[]) { struct priv *p = glfwGetWindowUserPointer(win); for (int i = 0; i < num; i++) { if (p->files_num == p->files_size) { size_t new_size = p->files_size ? p->files_size * 2 : 16; char **new_files = realloc(p->files, new_size * sizeof(char *)); if (!new_files) return; p->files = new_files; p->files_size = new_size; } char *file = strdup(files[i]); if (!file) return; p->files[p->files_num++] = file; } } #ifdef USE_GL static bool make_current(void *priv) { GLFWwindow *win = priv; glfwMakeContextCurrent(win); return true; } static void release_current(void *priv) { glfwMakeContextCurrent(NULL); } #endif #ifdef USE_VK static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL get_vk_proc_addr(VkInstance instance, const char* pName) { return (PFN_vkVoidFunction) glfwGetInstanceProcAddress(instance, pName); } #endif static struct window *glfw_create(pl_log log, const struct window_params *params) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->w.impl = &IMPL; if (!glfwInit()) { fprintf(stderr, "GLFW: Failed initializing?\n"); goto error; } glfwSetErrorCallback(&err_cb); #ifdef USE_VK if (!glfwVulkanSupported()) { fprintf(stderr, "GLFW: No vulkan support! Perhaps recompile with -DUSE_GL\n"); goto error; } glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); #endif // USE_VK #ifdef USE_D3D11 glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); #endif // USE_D3D11 #ifdef USE_GL struct { int api; int major, minor; int glsl_ver; int profile; } gl_vers[] = { { GLFW_OPENGL_API, 4, 6, 460, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 5, 450, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 4, 440, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 0, 400, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 3, 3, 330, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 3, 2, 150, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_ES_API, 3, 2, 320, }, { GLFW_OPENGL_API, 3, 1, 140, }, { GLFW_OPENGL_ES_API, 3, 1, 310, }, { GLFW_OPENGL_API, 3, 0, 130, }, { GLFW_OPENGL_ES_API, 3, 0, 300, }, { GLFW_OPENGL_ES_API, 2, 0, 100, }, { GLFW_OPENGL_API, 2, 1, 120, }, { GLFW_OPENGL_API, 2, 0, 110, }, }; for (int i = 0; i < PL_ARRAY_SIZE(gl_vers); i++) { glfwWindowHint(GLFW_CLIENT_API, gl_vers[i].api); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, gl_vers[i].major); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, gl_vers[i].minor); glfwWindowHint(GLFW_OPENGL_PROFILE, gl_vers[i].profile); #endif // USE_GL if (params->alpha) glfwWindowHint(GLFW_TRANSPARENT_FRAMEBUFFER, GLFW_TRUE); printf("Creating %dx%d window%s...\n", params->width, params->height, params->alpha ? " (with alpha)" : ""); p->win = glfwCreateWindow(params->width, params->height, params->title, NULL, NULL); #ifdef USE_GL if (p->win) break; } #endif // USE_GL if (!p->win) { fprintf(stderr, "GLFW: Failed creating window\n"); goto error; } // Set up GLFW event callbacks glfwSetWindowUserPointer(p->win, p); glfwSetFramebufferSizeCallback(p->win, resize_cb); glfwSetWindowCloseCallback(p->win, close_cb); glfwSetScrollCallback(p->win, scroll_cb); glfwSetDropCallback(p->win, drop_cb); #ifdef USE_VK VkResult err; uint32_t num; p->vk_inst = pl_vk_inst_create(log, pl_vk_inst_params( .get_proc_addr = get_vk_proc_addr, .debug = DEBUG, .extensions = glfwGetRequiredInstanceExtensions(&num), .num_extensions = num, )); if (!p->vk_inst) { fprintf(stderr, "libplacebo: Failed creating vulkan instance\n"); goto error; } err = glfwCreateWindowSurface(p->vk_inst->instance, p->win, NULL, &p->surf); if (err != VK_SUCCESS) { fprintf(stderr, "GLFW: Failed creating vulkan surface\n"); goto error; } p->vk = pl_vulkan_create(log, pl_vulkan_params( .instance = p->vk_inst->instance, .get_proc_addr = p->vk_inst->get_proc_addr, .surface = p->surf, .allow_software = true, )); if (!p->vk) { fprintf(stderr, "libplacebo: Failed creating vulkan device\n"); goto error; } p->w.swapchain = pl_vulkan_create_swapchain(p->vk, pl_vulkan_swapchain_params( .surface = p->surf, .present_mode = VK_PRESENT_MODE_FIFO_KHR, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating vulkan swapchain\n"); goto error; } p->w.gpu = p->vk->gpu; #endif // USE_VK #ifdef USE_GL p->gl = pl_opengl_create(log, pl_opengl_params( .allow_software = true, .debug = DEBUG, .make_current = make_current, .release_current = release_current, .priv = p->win, )); if (!p->gl) { fprintf(stderr, "libplacebo: Failed creating opengl device\n"); goto error; } p->w.swapchain = pl_opengl_create_swapchain(p->gl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) glfwSwapBuffers, .priv = p->win, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating opengl swapchain\n"); goto error; } p->w.gpu = p->gl->gpu; #endif // USE_GL #ifdef USE_D3D11 p->d3d11 = pl_d3d11_create(log, pl_d3d11_params( .debug = DEBUG )); if (!p->d3d11) { fprintf(stderr, "libplacebo: Failed creating D3D11 device\n"); goto error; } p->w.swapchain = pl_d3d11_create_swapchain(p->d3d11, pl_d3d11_swapchain_params( .window = glfwGetWin32Window(p->win), )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating D3D11 swapchain\n"); goto error; } p->w.gpu = p->d3d11->gpu; #endif // USE_D3D11 int w = params->width, h = params->height; pl_swapchain_colorspace_hint(p->w.swapchain, ¶ms->colors); if (!pl_swapchain_resize(p->w.swapchain, &w, &h)) { fprintf(stderr, "libplacebo: Failed initializing swapchain\n"); goto error; } return &p->w; error: window_destroy((struct window **) &p); return NULL; } static void glfw_destroy(struct window **window) { struct priv *p = (struct priv *) *window; if (!p) return; pl_swapchain_destroy(&p->w.swapchain); #ifdef USE_VK pl_vulkan_destroy(&p->vk); if (p->surf) { PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR) p->vk_inst->get_proc_addr(p->vk_inst->instance, "vkDestroySurfaceKHR"); vkDestroySurfaceKHR(p->vk_inst->instance, p->surf, NULL); } pl_vk_inst_destroy(&p->vk_inst); #endif #ifdef USE_GL pl_opengl_destroy(&p->gl); #endif #ifdef USE_D3D11 pl_d3d11_destroy(&p->d3d11); #endif for (int i = 0; i < p->files_num; i++) free(p->files[i]); free(p->files); glfwTerminate(); free(p); *window = NULL; } static void glfw_poll(struct window *window, bool block) { if (block) { glfwWaitEvents(); } else { glfwPollEvents(); } } static void glfw_get_cursor(const struct window *window, int *x, int *y) { struct priv *p = (struct priv *) window; double dx, dy; int fw, fh, ww, wh; glfwGetCursorPos(p->win, &dx, &dy); glfwGetFramebufferSize(p->win, &fw, &fh); glfwGetWindowSize(p->win, &ww, &wh); *x = floor(dx * fw / ww); *y = floor(dy * fh / wh); } static bool glfw_get_button(const struct window *window, enum button btn) { static const int button_map[] = { [BTN_LEFT] = GLFW_MOUSE_BUTTON_LEFT, [BTN_RIGHT] = GLFW_MOUSE_BUTTON_RIGHT, [BTN_MIDDLE] = GLFW_MOUSE_BUTTON_MIDDLE, }; struct priv *p = (struct priv *) window; return glfwGetMouseButton(p->win, button_map[btn]) == GLFW_PRESS; } static bool glfw_get_key(const struct window *window, enum key key) { static const int key_map[] = { [KEY_ESC] = GLFW_KEY_ESCAPE, }; struct priv *p = (struct priv *) window; return glfwGetKey(p->win, key_map[key]) == GLFW_PRESS; } static void glfw_get_scroll(const struct window *window, float *dx, float *dy) { struct priv *p = (struct priv *) window; *dx = p->scroll_dx; *dy = p->scroll_dy; p->scroll_dx = p->scroll_dy = 0.0; } static char *glfw_get_file(const struct window *window) { struct priv *p = (struct priv *) window; if (p->file_seen) { assert(p->files_num); free(p->files[0]); memmove(&p->files[0], &p->files[1], --p->files_num * sizeof(char *)); p->file_seen = false; } if (!p->files_num) return NULL; p->file_seen = true; return p->files[0]; } const struct window_impl IMPL = { .name = IMPL_NAME, .create = glfw_create, .destroy = glfw_destroy, .poll = glfw_poll, .get_cursor = glfw_get_cursor, .get_button = glfw_get_button, .get_key = glfw_get_key, .get_scroll = glfw_get_scroll, .get_file = glfw_get_file, }; libplacebo-v4.192.1/demos/window_sdl.c000066400000000000000000000207451417677245700176320ustar00rootroot00000000000000// License: CC0 / Public Domain #if !defined(USE_GL) && !defined(USE_VK) || defined(USE_GL) && defined(USE_VK) #error Specify exactly one of -DUSE_GL or -DUSE_VK when compiling! #endif #include #include "common.h" #include "window.h" #ifdef USE_VK #define VK_NO_PROTOTYPES #include #include #define WINFLAG_API SDL_WINDOW_VULKAN #define IMPL win_impl_sdl_vk #define IMPL_NAME "SDL2 (vulkan)" #endif #ifdef USE_GL #include #define WINFLAG_API SDL_WINDOW_OPENGL #define IMPL win_impl_sdl_gl #define IMPL_NAME "SDL2 (opengl)" #endif #ifdef NDEBUG #define DEBUG false #else #define DEBUG true #endif const struct window_impl IMPL; struct priv { struct window w; SDL_Window *win; #ifdef USE_VK VkSurfaceKHR surf; pl_vulkan vk; pl_vk_inst vk_inst; #endif #ifdef USE_GL SDL_GLContext gl_ctx; pl_opengl gl; #endif int scroll_dx, scroll_dy; char **files; size_t files_num; size_t files_size; bool file_seen; }; #ifdef USE_GL static bool make_current(void *priv) { struct priv *p = priv; return SDL_GL_MakeCurrent(p->win, p->gl_ctx) == 0; } static void release_current(void *priv) { struct priv *p = priv; SDL_GL_MakeCurrent(p->win, NULL); } #endif static struct window *sdl_create(pl_log log, const struct window_params *params) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->w.impl = &IMPL; if (SDL_Init(SDL_INIT_VIDEO) < 0) { fprintf(stderr, "SDL2: Failed initializing: %s\n", SDL_GetError()); goto error; } uint32_t sdl_flags = SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE | WINFLAG_API; p->win = SDL_CreateWindow(params->title, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, params->width, params->height, sdl_flags); if (!p->win) { fprintf(stderr, "SDL2: Failed creating window: %s\n", SDL_GetError()); goto error; } #ifdef USE_VK unsigned int num = 0; if (!SDL_Vulkan_GetInstanceExtensions(p->win, &num, NULL)) { fprintf(stderr, "SDL2: Failed enumerating vulkan extensions: %s\n", SDL_GetError()); goto error; } const char **exts = malloc(num * sizeof(const char *)); SDL_Vulkan_GetInstanceExtensions(p->win, &num, exts); p->vk_inst = pl_vk_inst_create(log, pl_vk_inst_params( .get_proc_addr = SDL_Vulkan_GetVkGetInstanceProcAddr(), .debug = DEBUG, .extensions = exts, .num_extensions = num, )); free(exts); if (!p->vk_inst) { fprintf(stderr, "libplacebo: Failed creating vulkan instance!\n"); goto error; } if (!SDL_Vulkan_CreateSurface(p->win, p->vk_inst->instance, &p->surf)) { fprintf(stderr, "SDL2: Failed creating surface: %s\n", SDL_GetError()); goto error; } p->vk = pl_vulkan_create(log, pl_vulkan_params( .instance = p->vk_inst->instance, .get_proc_addr = p->vk_inst->get_proc_addr, .surface = p->surf, .allow_software = true, )); if (!p->vk) { fprintf(stderr, "libplacebo: Failed creating vulkan device\n"); goto error; } p->w.swapchain = pl_vulkan_create_swapchain(p->vk, pl_vulkan_swapchain_params( .surface = p->surf, .present_mode = VK_PRESENT_MODE_FIFO_KHR, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating vulkan swapchain\n"); goto error; } p->w.gpu = p->vk->gpu; #endif // USE_VK #ifdef USE_GL p->gl_ctx = SDL_GL_CreateContext(p->win); if (!p->gl_ctx) { fprintf(stderr, "SDL2: Failed creating GL context: %s\n", SDL_GetError()); goto error; } p->gl = pl_opengl_create(log, pl_opengl_params( .allow_software = true, .debug = DEBUG, .make_current = make_current, .release_current = release_current, .priv = p, )); if (!p->gl) { fprintf(stderr, "libplacebo: Failed creating opengl device\n"); goto error; } p->w.swapchain = pl_opengl_create_swapchain(p->gl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) SDL_GL_SwapWindow, .priv = p->win, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating opengl swapchain\n"); goto error; } p->w.gpu = p->gl->gpu; #endif // USE_GL int w = params->width, h = params->height; pl_swapchain_colorspace_hint(p->w.swapchain, ¶ms->colors); if (!pl_swapchain_resize(p->w.swapchain, &w, &h)) { fprintf(stderr, "libplacebo: Failed initializing swapchain\n"); goto error; } return &p->w; error: window_destroy((struct window **) &p); return NULL; } static void sdl_destroy(struct window **window) { struct priv *p = (struct priv *) *window; if (!p) return; pl_swapchain_destroy(&p->w.swapchain); #ifdef USE_VK pl_vulkan_destroy(&p->vk); if (p->surf) { PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR) p->vk_inst->get_proc_addr(p->vk_inst->instance, "vkDestroySurfaceKHR"); vkDestroySurfaceKHR(p->vk_inst->instance, p->surf, NULL); } pl_vk_inst_destroy(&p->vk_inst); #endif #ifdef USE_GL pl_opengl_destroy(&p->gl); SDL_GL_DeleteContext(p->gl_ctx); #endif for (int i = 0; i < p->files_num; i++) SDL_free(p->files[i]); free(p->files); SDL_DestroyWindow(p->win); SDL_Quit(); free(p); *window = NULL; } static inline void handle_event(struct priv *p, SDL_Event *event) { switch (event->type) { case SDL_QUIT: p->w.window_lost = true; return; case SDL_WINDOWEVENT: if (event->window.windowID != SDL_GetWindowID(p->win)) return; if (event->window.event == SDL_WINDOWEVENT_SIZE_CHANGED) { int width = event->window.data1, height = event->window.data2; if (!pl_swapchain_resize(p->w.swapchain, &width, &height)) { fprintf(stderr, "libplacebo: Failed resizing swapchain? Exiting...\n"); p->w.window_lost = true; } } return; case SDL_MOUSEWHEEL: p->scroll_dx += event->wheel.x; p->scroll_dy += event->wheel.y; return; case SDL_DROPFILE: if (p->files_num == p->files_size) { size_t new_size = p->files_size ? p->files_size * 2 : 16; char **new_files = realloc(p->files, new_size * sizeof(char *)); if (!new_files) return; p->files = new_files; p->files_size = new_size; } p->files[p->files_num++] = event->drop.file; return; } } static void sdl_poll(struct window *window, bool block) { struct priv *p = (struct priv *) window; SDL_Event event; int ret; do { ret = block ? SDL_WaitEvent(&event) : SDL_PollEvent(&event); if (ret) handle_event(p, &event); // Only block on the first iteration block = false; } while (ret); } static void sdl_get_cursor(const struct window *window, int *x, int *y) { SDL_GetMouseState(x, y); } static bool sdl_get_button(const struct window *window, enum button btn) { static const uint32_t button_mask[] = { [BTN_LEFT] = SDL_BUTTON_LMASK, [BTN_RIGHT] = SDL_BUTTON_RMASK, [BTN_MIDDLE] = SDL_BUTTON_MMASK, }; return SDL_GetMouseState(NULL, NULL) & button_mask[btn]; } static bool sdl_get_key(const struct window *window, enum key key) { static const size_t key_map[] = { [KEY_ESC] = SDL_SCANCODE_ESCAPE, }; return SDL_GetKeyboardState(NULL)[key_map[key]]; } static void sdl_get_scroll(const struct window *window, float *dx, float *dy) { struct priv *p = (struct priv *) window; *dx = p->scroll_dx; *dy = p->scroll_dy; p->scroll_dx = p->scroll_dy = 0; } static char *sdl_get_file(const struct window *window) { struct priv *p = (struct priv *) window; if (p->file_seen) { assert(p->files_num); SDL_free(p->files[0]); memmove(&p->files[0], &p->files[1], --p->files_num * sizeof(char *)); p->file_seen = false; } if (!p->files_num) return NULL; p->file_seen = true; return p->files[0]; } const struct window_impl IMPL = { .name = IMPL_NAME, .create = sdl_create, .destroy = sdl_destroy, .poll = sdl_poll, .get_cursor = sdl_get_cursor, .get_button = sdl_get_button, .get_key = sdl_get_key, .get_scroll = sdl_get_scroll, .get_file = sdl_get_file, }; libplacebo-v4.192.1/gcovr.cfg000066400000000000000000000001261417677245700157760ustar00rootroot00000000000000exclude = .*/tests/.* exclude = .*/demos/.* exclude = .*_gen\.c$ sort-uncovered = yes libplacebo-v4.192.1/meson.build000066400000000000000000000076651417677245700163560ustar00rootroot00000000000000project('libplacebo', ['c', 'cpp'], license: 'LGPL2.1+', default_options: [ 'buildtype=debugoptimized', 'warning_level=2', 'c_std=c11', 'cpp_std=c++11', ], meson_version: '>=0.54', version: '@0@.@1@.@2@'.format( # Major version 4, # API version 157 + { # Incremental log of API changes (since v4.157.0) '158': 'add support for H.274 film grain', '159': 'remove fields deprecated for libplacebo v3', '160': 'add preprocessor macros for default params', '161': 'make H.274 film grain values indirect', '162': 'support rotation in pl_renderer', '163': 'add pl_frame_copy_stream_props', '164': 'support blending against tiles', '165': 'add pl_fmt.signature', '166': 'add pl_index_format', '167': 'expose pl_dispatch_reset_frame', '168': 'refactor pl_tex_transfer.stride_w/h into row/depth_pitch', '169': 'refactor pl_pass_params.target_dummy into target_format', '170': 'allow pl_queue_update on NULL', '171': 'make vulkan 1.2 the minimum version', '172': 'replace VkSemaphore by pl_vulkan_sem in pl_vulkan_hold/release', '173': 'remove VkAccessFlags from pl_vulkan_hold/release', '174': 'deprecate pl_vulkan_params.disable_events', '175': 'require timeline semaphores for all vulkan devices', '176': 'revert vulkan 1.2 requirement', '177': 'add debug_tag to pl_tex/buf_params', '178': 'add pl_gpu_limits.align_vertex_stride', '179': 'add pl_render_params.skip_caching_single_frame', '180': 'add pl_gpu_limits.max_variable_comps', '181': 'add pl_shader_set_alpha, change alpha handling of pl_shader_decode_color', '182': 'add pl_vulkan_get, pl_opengl_get, pl_d3d11_get', '183': 'relax pl_shared_mem.size > 0 requirement', '184': 'add pl_map_avframe/pl_unmap_avframe, deprecate pl_upload_avframe', '185': 'add PL_COLOR_SYSTEM_DOLBYVISION and reshaping', '186': 'add pl_d3d11_swapchain_params.flags', '187': 'add ', '188': 'refactor pl_color_map_params tone mapping settings', '189': 'refactor pl_color_space, merging it with pl_hdr_metadata', '190': 'add pl_color_map_params.gamut_mode, replacing gamut_clipping/warning', '191': 'add pl_map_dovi_metadata', '192': 'add pl_map_avframe_ex', }.keys().length(), # Fix version 1) ) # Version number version = meson.project_version() version_pretty = 'v' + version version_split = version.split('.') majorver = version_split[0] apiver = version_split[1] fixver = version_split[2] proj_name = meson.project_name() # Project build options build_opts = [ # Warnings '-Wundef', '-Wshadow', '-Wparentheses', '-Wpointer-arith', ] link_args = [] cc = meson.get_compiler('c') cxx = meson.get_compiler('cpp') c_opts = [ '-D_ISOC99_SOURCE', '-D_ISOC11_SOURCE', '-D_GNU_SOURCE', '-D_XOPEN_SOURCE=700', '-U__STRICT_ANSI__', '-fvisibility=hidden', '-Wmissing-prototypes', # Warnings to ignore '-Wno-sign-compare', '-Wno-unused-parameter', '-Wno-missing-field-initializers', '-Wno-type-limits', # Warnings to treat as errors '-Werror=implicit-function-declaration', ] # glslang needs c++11 cpp_opts = [ '-fvisibility=hidden', ] if cc.has_argument('-Wincompatible-pointer-types') c_opts += ['-Werror=incompatible-pointer-types'] endif # clang's version of -Wmissing-braces rejects the common {0} initializers if cc.get_id() == 'clang' c_opts += ['-Wno-missing-braces'] endif # don't leak library symbols if possible vflag = '-Wl,--exclude-libs=ALL' if cc.has_link_argument(vflag) link_args += [vflag] endif # OS specific build options if host_machine.system() == 'windows' build_opts += ['-D_WIN32_WINNT=0x0601'] endif add_project_arguments(build_opts + c_opts, language: 'c') add_project_arguments(build_opts + cpp_opts, language: 'cpp') add_project_link_arguments(link_args, language: 'c') subdir('src') if get_option('demos') subdir('demos') endif libplacebo-v4.192.1/meson_options.txt000066400000000000000000000027341417677245700176410ustar00rootroot00000000000000# Optional components option('vulkan', type: 'feature', value: 'auto', description: 'Vulkan-based renderer') option('vulkan-link', type: 'boolean', value: true, description: 'Link directly against vkGetInstanceProcAddr from libvulkan.so') option('vulkan-registry', type: 'string', value: '', description: 'Path to vulkan XML registry (for code generation)') option('opengl', type: 'feature', value: 'auto', description: 'OpenGL-based renderer') option('d3d11', type: 'feature', value: 'auto', description: 'Direct3D 11 based renderer') option('glslang', type: 'feature', value: 'auto', description: 'glslang SPIR-V compiler') option('shaderc', type: 'feature', value: 'auto', description: 'libshaderc SPIR-V compiler') option('lcms', type: 'feature', value: 'auto', description: 'LittleCMS 2 support') # Miscellaneous option('demos', type: 'boolean', value: true, description: 'Enable building (and installing) the demo programs') option('tests', type: 'boolean', value: false, description: 'Enable building the test cases') option('bench', type: 'boolean', value: false, description: 'Enable building benchmarks (`meson test benchmark`)') option('fuzz', type: 'boolean', value: false, description: 'Enable building fuzzer binaries (`CC=afl-cc`)') option('unwind', type: 'feature', value: 'auto', description: 'Enable linking against libunwind for printing stack traces caused by runtime errors') libplacebo-v4.192.1/src/000077500000000000000000000000001417677245700147655ustar00rootroot00000000000000libplacebo-v4.192.1/src/colorspace.c000066400000000000000000001161701417677245700172710ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" bool pl_color_system_is_ycbcr_like(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: return false; case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: return true; case PL_COLOR_SYSTEM_COUNT: break; }; pl_unreachable(); } bool pl_color_system_is_linear(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: return true; case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_XYZ: return false; case PL_COLOR_SYSTEM_COUNT: break; }; pl_unreachable(); } enum pl_color_system pl_color_system_guess_ycbcr(int width, int height) { if (width >= 1280 || height > 576) { // Typical HD content return PL_COLOR_SYSTEM_BT_709; } else { // Typical SD content return PL_COLOR_SYSTEM_BT_601; } } bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2) { return b1->sample_depth == b2->sample_depth && b1->color_depth == b2->color_depth && b1->bit_shift == b2->bit_shift; } const struct pl_color_repr pl_color_repr_unknown = {0}; const struct pl_color_repr pl_color_repr_rgb = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, }; const struct pl_color_repr pl_color_repr_sdtv = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_hdtv = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_uhdtv = { .sys = PL_COLOR_SYSTEM_BT_2020_NC, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_jpeg = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_FULL, }; bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2) { return c1->sys == c2->sys && c1->levels == c2->levels && c1->alpha == c2->alpha && c1->dovi == c2->dovi && pl_bit_encoding_equal(&c1->bits, &c2->bits); } static struct pl_bit_encoding pl_bit_encoding_merge(const struct pl_bit_encoding *orig, const struct pl_bit_encoding *new) { return (struct pl_bit_encoding) { .sample_depth = PL_DEF(orig->sample_depth, new->sample_depth), .color_depth = PL_DEF(orig->color_depth, new->color_depth), .bit_shift = PL_DEF(orig->bit_shift, new->bit_shift), }; } void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *new) { *orig = (struct pl_color_repr) { .sys = PL_DEF(orig->sys, new->sys), .levels = PL_DEF(orig->levels, new->levels), .alpha = PL_DEF(orig->alpha, new->alpha), .dovi = PL_DEF(orig->dovi, new->dovi), .bits = pl_bit_encoding_merge(&orig->bits, &new->bits), }; } enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr) { if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) return PL_COLOR_LEVELS_FULL; if (repr->levels) return repr->levels; return pl_color_system_is_ycbcr_like(repr->sys) ? PL_COLOR_LEVELS_LIMITED : PL_COLOR_LEVELS_FULL; } float pl_color_repr_normalize(struct pl_color_repr *repr) { float scale = 1.0; struct pl_bit_encoding *bits = &repr->bits; if (bits->bit_shift) { scale /= (1LL << bits->bit_shift); bits->bit_shift = 0; } // If one of these is set but not the other, use the set one int tex_bits = PL_DEF(bits->sample_depth, 8); int col_bits = PL_DEF(bits->color_depth, tex_bits); tex_bits = PL_DEF(tex_bits, col_bits); if (pl_color_levels_guess(repr) == PL_COLOR_LEVELS_LIMITED) { // Limit range is always shifted directly scale *= (float) (1LL << tex_bits) / (1LL << col_bits); } else { // Full range always uses the full range available scale *= ((1LL << tex_bits) - 1.) / ((1LL << col_bits) - 1.); } bits->color_depth = bits->sample_depth; return scale; } bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: case PL_COLOR_PRIM_BT_601_525: case PL_COLOR_PRIM_BT_601_625: case PL_COLOR_PRIM_BT_709: case PL_COLOR_PRIM_BT_470M: case PL_COLOR_PRIM_EBU_3213: return false; case PL_COLOR_PRIM_BT_2020: case PL_COLOR_PRIM_APPLE: case PL_COLOR_PRIM_ADOBE: case PL_COLOR_PRIM_PRO_PHOTO: case PL_COLOR_PRIM_CIE_1931: case PL_COLOR_PRIM_DCI_P3: case PL_COLOR_PRIM_DISPLAY_P3: case PL_COLOR_PRIM_V_GAMUT: case PL_COLOR_PRIM_S_GAMUT: case PL_COLOR_PRIM_FILM_C: return true; case PL_COLOR_PRIM_COUNT: break; } pl_unreachable(); } enum pl_color_primaries pl_color_primaries_guess(int width, int height) { // HD content if (width >= 1280 || height > 576) return PL_COLOR_PRIM_BT_709; switch (height) { case 576: // Typical PAL content, including anamorphic/squared return PL_COLOR_PRIM_BT_601_625; case 480: // Typical NTSC content, including squared case 486: // NTSC Pro or anamorphic NTSC return PL_COLOR_PRIM_BT_601_525; default: // No good metric, just pick BT.709 to minimize damage return PL_COLOR_PRIM_BT_709; } } float pl_color_transfer_nominal_peak(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: return 1.0; case PL_COLOR_TRC_PQ: return 10000.0 / PL_COLOR_SDR_WHITE; case PL_COLOR_TRC_HLG: return 12.0 / PL_COLOR_SDR_WHITE_HLG; case PL_COLOR_TRC_V_LOG: return 46.0855; case PL_COLOR_TRC_S_LOG1: return 6.52; case PL_COLOR_TRC_S_LOG2: return 9.212; case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } bool pl_color_light_is_scene_referred(enum pl_color_light light) { switch (light) { case PL_COLOR_LIGHT_UNKNOWN: case PL_COLOR_LIGHT_DISPLAY: return false; case PL_COLOR_LIGHT_SCENE_HLG: case PL_COLOR_LIGHT_SCENE_709_1886: case PL_COLOR_LIGHT_SCENE_1_2: return true; case PL_COLOR_LIGHT_COUNT: break; } pl_unreachable(); } const struct pl_hdr_metadata pl_hdr_metadata_empty = {0}; const struct pl_hdr_metadata pl_hdr_metadata_hdr10 ={ .prim = { .red = {0.708, 0.292}, .green = {0.170, 0.797}, .blue = {0.131, 0.046}, .white = {0.31271, 0.32902}, }, .min_luma = 0, .max_luma = 10000, .max_cll = 10000, .max_fall = 0, // unknown }; bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a, const struct pl_hdr_metadata *b) { return pl_raw_primaries_equal(&a->prim, &b->prim) && a->min_luma == b->min_luma && a->max_luma == b->max_luma && a->max_cll == b->max_cll && a->max_fall == b->max_fall; } void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig, const struct pl_hdr_metadata *update) { pl_raw_primaries_merge(&orig->prim, &update->prim); if (!orig->min_luma) orig->min_luma = update->min_luma; if (!orig->max_luma) orig->max_luma = update->max_luma; if (!orig->max_cll) orig->max_cll = update->max_cll; if (!orig->max_fall) orig->max_fall = update->max_fall; } const struct pl_color_space pl_color_space_unknown = {0}; const struct pl_color_space pl_color_space_srgb = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_SRGB, }; const struct pl_color_space pl_color_space_bt709 = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, }; const struct pl_color_space pl_color_space_hdr10 = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, }; const struct pl_color_space pl_color_space_bt2020_hlg = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; const struct pl_color_space pl_color_space_monitor = { .primaries = PL_COLOR_PRIM_BT_709, // sRGB primaries .transfer = PL_COLOR_TRC_UNKNOWN, // unknown SDR response }; bool pl_color_space_is_hdr(const struct pl_color_space *csp) { return csp->hdr.max_luma > PL_COLOR_SDR_WHITE || csp->sig_scale > 1 || pl_color_transfer_is_hdr(csp->transfer); } bool pl_color_space_is_black_scaled(const struct pl_color_space *csp) { switch (csp->transfer) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: case PL_COLOR_TRC_HLG: return true; case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: return false; case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *new) { if (!orig->primaries) orig->primaries = new->primaries; if (!orig->transfer) orig->transfer = new->transfer; pl_hdr_metadata_merge(&orig->hdr, &new->hdr); } bool pl_color_space_equal(const struct pl_color_space *c1, const struct pl_color_space *c2) { return c1->primaries == c2->primaries && c1->transfer == c2->transfer && pl_hdr_metadata_equal(&c1->hdr, &c2->hdr); } void pl_color_space_infer(struct pl_color_space *space) { if (!space->primaries) space->primaries = PL_COLOR_PRIM_BT_709; if (!space->transfer) space->transfer = PL_COLOR_TRC_BT_1886; // Backwards-compatibility with deprecated fields if (space->sig_peak) { space->hdr.max_luma = space->sig_peak * PL_COLOR_SDR_WHITE; space->sig_peak = 0; } if (space->sig_floor) { space->hdr.min_luma = space->sig_floor * PL_COLOR_SDR_WHITE; space->sig_floor = 0; } if (space->hdr.max_luma < space->hdr.min_luma) // sanity space->hdr.max_luma = space->hdr.min_luma = 0; if (space->hdr.max_luma < 1 || space->hdr.max_luma > 10000) { space->hdr.max_luma = pl_color_transfer_nominal_peak(space->transfer) * PL_COLOR_SDR_WHITE; // Exception: For HLG content, we want to infer a value of 1000 cd/m², // a value which is considered the "reference" HLG display. if (space->transfer == PL_COLOR_TRC_HLG) space->hdr.max_luma = 1000; } if (space->hdr.min_luma <= 0 || space->hdr.min_luma > 100) { if (pl_color_transfer_is_hdr(space->transfer)) { space->hdr.min_luma = 0.0050f; // Typical HDR black } else { space->hdr.min_luma = space->hdr.max_luma / 1000; // Typical SDR contrast } } if (space->sig_scale && !pl_color_transfer_is_hdr(space->transfer)) { space->hdr.max_luma *= space->sig_scale; space->hdr.min_luma *= space->sig_scale; space->sig_scale = 0; } // Default the signal color space based on the nominal raw primaries pl_raw_primaries_merge(&space->hdr.prim, pl_raw_primaries_get(space->primaries)); } void pl_color_space_infer_ref(struct pl_color_space *space, const struct pl_color_space *refp) { struct pl_color_space ref = *refp; pl_color_space_infer(&ref); if (!space->primaries) { if (pl_color_primaries_is_wide_gamut(ref.primaries)) { space->primaries = PL_COLOR_PRIM_BT_709; } else { space->primaries = ref.primaries; } } if (!space->transfer) { if (pl_color_transfer_is_hdr(ref.transfer)) { space->transfer = PL_COLOR_TRC_BT_1886; } else if (ref.transfer == PL_COLOR_TRC_LINEAR) { space->transfer = PL_COLOR_TRC_GAMMA22; } else { space->transfer = ref.transfer; } } // Infer the remaining fields after making the above choices pl_color_space_infer(space); } const struct pl_color_adjustment pl_color_adjustment_neutral = { .brightness = 0.0, .contrast = 1.0, .saturation = 1.0, .hue = 0.0, .gamma = 1.0, .temperature = 0.0, }; void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y) { *x = *y = 0; // This is the majority of subsampled chroma content out there loc = PL_DEF(loc, PL_CHROMA_LEFT); switch (loc) { case PL_CHROMA_LEFT: case PL_CHROMA_TOP_LEFT: case PL_CHROMA_BOTTOM_LEFT: *x = -0.5; break; default: break; } switch (loc) { case PL_CHROMA_TOP_LEFT: case PL_CHROMA_TOP_CENTER: *y = -0.5; break; default: break; } switch (loc) { case PL_CHROMA_BOTTOM_LEFT: case PL_CHROMA_BOTTOM_CENTER: *y = 0.5; break; default: break; } } struct pl_cie_xy pl_white_from_temp(float temp) { temp = PL_CLAMP(temp, 2500, 25000); double ti = 1000.0 / temp, ti2 = ti * ti, ti3 = ti2 * ti, x; if (temp <= 7000) { x = -4.6070 * ti3 + 2.9678 * ti2 + 0.09911 * ti + 0.244063; } else { x = -2.0064 * ti3 + 1.9018 * ti2 + 0.24748 * ti + 0.237040; } return (struct pl_cie_xy) { .x = x, .y = -3 * (x*x) + 2.87 * x - 0.275, }; } bool pl_raw_primaries_equal(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { return pl_cie_xy_equal(&a->red, &b->red) && pl_cie_xy_equal(&a->green, &b->green) && pl_cie_xy_equal(&a->blue, &b->blue) && pl_cie_xy_equal(&a->white, &b->white); } void pl_raw_primaries_merge(struct pl_raw_primaries *orig, const struct pl_raw_primaries *update) { union { struct pl_raw_primaries prim; float raw[8]; } *pa = (void *) orig, *pb = (void *) update; pl_static_assert(sizeof(*pa) == sizeof(*orig)); for (int i = 0; i < PL_ARRAY_SIZE(pa->raw); i++) pa->raw[i] = PL_DEF(pa->raw[i], pb->raw[i]); } const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim) { /* Values from: ITU-R Recommendations BT.470-6, BT.601-7, BT.709-5, BT.2020-0 https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-6-199811-S!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.601-7-201103-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-5-200204-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2020-0-201208-I!!PDF-E.pdf Other colorspaces from https://en.wikipedia.org/wiki/RGB_color_space#Specifications */ // CIE standard illuminant series #define CIE_D50 {0.34577, 0.35850} #define CIE_D65 {0.31271, 0.32902} #define CIE_DCI {0.31400, 0.35100} #define CIE_C {0.31006, 0.31616} #define CIE_E {1.0/3.0, 1.0/3.0} static const struct pl_raw_primaries primaries[] = { [PL_COLOR_PRIM_BT_470M] = { .red = {0.670, 0.330}, .green = {0.210, 0.710}, .blue = {0.140, 0.080}, .white = CIE_C, }, [PL_COLOR_PRIM_BT_601_525] = { .red = {0.630, 0.340}, .green = {0.310, 0.595}, .blue = {0.155, 0.070}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_601_625] = { .red = {0.640, 0.330}, .green = {0.290, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_709] = { .red = {0.640, 0.330}, .green = {0.300, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_2020] = { .red = {0.708, 0.292}, .green = {0.170, 0.797}, .blue = {0.131, 0.046}, .white = CIE_D65, }, [PL_COLOR_PRIM_APPLE] = { .red = {0.625, 0.340}, .green = {0.280, 0.595}, .blue = {0.115, 0.070}, .white = CIE_D65, }, [PL_COLOR_PRIM_ADOBE] = { .red = {0.640, 0.330}, .green = {0.210, 0.710}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_PRO_PHOTO] = { .red = {0.7347, 0.2653}, .green = {0.1596, 0.8404}, .blue = {0.0366, 0.0001}, .white = CIE_D50, }, [PL_COLOR_PRIM_CIE_1931] = { .red = {0.7347, 0.2653}, .green = {0.2738, 0.7174}, .blue = {0.1666, 0.0089}, .white = CIE_E, }, // From SMPTE RP 431-2 [PL_COLOR_PRIM_DCI_P3] = { .red = {0.680, 0.320}, .green = {0.265, 0.690}, .blue = {0.150, 0.060}, .white = CIE_DCI, }, [PL_COLOR_PRIM_DISPLAY_P3] = { .red = {0.680, 0.320}, .green = {0.265, 0.690}, .blue = {0.150, 0.060}, .white = CIE_D65, }, // From Panasonic VARICAM reference manual [PL_COLOR_PRIM_V_GAMUT] = { .red = {0.730, 0.280}, .green = {0.165, 0.840}, .blue = {0.100, -0.03}, .white = CIE_D65, }, // From Sony S-Log reference manual [PL_COLOR_PRIM_S_GAMUT] = { .red = {0.730, 0.280}, .green = {0.140, 0.855}, .blue = {0.100, -0.05}, .white = CIE_D65, }, // From FFmpeg source code [PL_COLOR_PRIM_FILM_C] = { .red = {0.681, 0.319}, .green = {0.243, 0.692}, .blue = {0.145, 0.049}, .white = CIE_C, }, [PL_COLOR_PRIM_EBU_3213] = { .red = {0.630, 0.340}, .green = {0.295, 0.605}, .blue = {0.155, 0.077}, .white = CIE_D65, }, }; // This is the default assumption if no colorspace information could // be determined, eg. for files which have no video channel. if (!prim) prim = PL_COLOR_PRIM_BT_709; pl_assert(prim < PL_ARRAY_SIZE(primaries)); return &primaries[prim]; } // Compute the RGB/XYZ matrix as described here: // http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html struct pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim) { struct pl_matrix3x3 out = {{{0}}}; float S[3], X[4], Z[4]; // Convert from CIE xyY to XYZ. Note that Y=1 holds true for all primaries X[0] = prim->red.x / prim->red.y; X[1] = prim->green.x / prim->green.y; X[2] = prim->blue.x / prim->blue.y; X[3] = prim->white.x / prim->white.y; Z[0] = (1 - prim->red.x - prim->red.y) / prim->red.y; Z[1] = (1 - prim->green.x - prim->green.y) / prim->green.y; Z[2] = (1 - prim->blue.x - prim->blue.y) / prim->blue.y; Z[3] = (1 - prim->white.x - prim->white.y) / prim->white.y; // S = XYZ^-1 * W for (int i = 0; i < 3; i++) { out.m[0][i] = X[i]; out.m[1][i] = 1; out.m[2][i] = Z[i]; } pl_matrix3x3_invert(&out); for (int i = 0; i < 3; i++) S[i] = out.m[i][0] * X[3] + out.m[i][1] * 1 + out.m[i][2] * Z[3]; // M = [Sc * XYZc] for (int i = 0; i < 3; i++) { out.m[0][i] = S[i] * X[i]; out.m[1][i] = S[i] * 1; out.m[2][i] = S[i] * Z[i]; } return out; } struct pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim) { // For simplicity, just invert the rgb2xyz matrix struct pl_matrix3x3 out = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_invert(&out); return out; } // LMS<-XYZ revised matrix from CIECAM97, based on a linear transform and // normalized for equal energy on monochrome inputs static const struct pl_matrix3x3 m_cat97 = {{ { 0.8562, 0.3372, -0.1934 }, { -0.8360, 1.8327, 0.0033 }, { 0.0357, -0.0469, 1.0112 }, }}; // M := M * XYZd<-XYZs static void apply_chromatic_adaptation(struct pl_cie_xy src, struct pl_cie_xy dest, struct pl_matrix3x3 *mat) { // If the white points are nearly identical, this is a wasteful identity // operation. if (fabs(src.x - dest.x) < 1e-6 && fabs(src.y - dest.y) < 1e-6) return; // XYZd<-XYZs = Ma^-1 * (I*[Cd/Cs]) * Ma // http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html // For Ma, we use the CIECAM97 revised (linear) matrix float C[3][2]; for (int i = 0; i < 3; i++) { // source cone C[i][0] = m_cat97.m[i][0] * pl_cie_X(src) + m_cat97.m[i][1] * 1 + m_cat97.m[i][2] * pl_cie_Z(src); // dest cone C[i][1] = m_cat97.m[i][0] * pl_cie_X(dest) + m_cat97.m[i][1] * 1 + m_cat97.m[i][2] * pl_cie_Z(dest); } // tmp := I * [Cd/Cs] * Ma struct pl_matrix3x3 tmp = {0}; for (int i = 0; i < 3; i++) tmp.m[i][i] = C[i][1] / C[i][0]; pl_matrix3x3_mul(&tmp, &m_cat97); // M := M * Ma^-1 * tmp struct pl_matrix3x3 ma_inv = m_cat97; pl_matrix3x3_invert(&ma_inv); pl_matrix3x3_mul(mat, &ma_inv); pl_matrix3x3_mul(mat, &tmp); } struct pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst) { // Use BT.709 primaries (with chosen white point) as an XYZ reference struct pl_raw_primaries csp = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); csp.white = src; struct pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(&csp); struct pl_matrix3x3 xyz2rgb = rgb2xyz; pl_matrix3x3_invert(&xyz2rgb); apply_chromatic_adaptation(src, dst, &xyz2rgb); pl_matrix3x3_mul(&xyz2rgb, &rgb2xyz); return xyz2rgb; } const struct pl_cone_params pl_vision_normal = {PL_CONE_NONE, 1.0}; const struct pl_cone_params pl_vision_protanomaly = {PL_CONE_L, 0.5}; const struct pl_cone_params pl_vision_protanopia = {PL_CONE_L, 0.0}; const struct pl_cone_params pl_vision_deuteranomaly = {PL_CONE_M, 0.5}; const struct pl_cone_params pl_vision_deuteranopia = {PL_CONE_M, 0.0}; const struct pl_cone_params pl_vision_tritanomaly = {PL_CONE_S, 0.5}; const struct pl_cone_params pl_vision_tritanopia = {PL_CONE_S, 0.0}; const struct pl_cone_params pl_vision_monochromacy = {PL_CONE_LM, 0.0}; const struct pl_cone_params pl_vision_achromatopsia = {PL_CONE_LMS, 0.0}; struct pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params, const struct pl_raw_primaries *prim) { // LMS<-RGB := LMS<-XYZ * XYZ<-RGB struct pl_matrix3x3 rgb2lms = m_cat97; struct pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_mul(&rgb2lms, &rgb2xyz); // LMS versions of the two opposing primaries, plus neutral float lms_r[3] = {1.0, 0.0, 0.0}, lms_b[3] = {0.0, 0.0, 1.0}, lms_w[3] = {1.0, 1.0, 1.0}; pl_matrix3x3_apply(&rgb2lms, lms_r); pl_matrix3x3_apply(&rgb2lms, lms_b); pl_matrix3x3_apply(&rgb2lms, lms_w); float a, b, c = params->strength; struct pl_matrix3x3 distort; switch (params->cones) { case PL_CONE_NONE: return pl_matrix3x3_identity; case PL_CONE_L: // Solve to preserve neutral and blue a = (lms_b[0] - lms_b[2] * lms_w[0] / lms_w[2]) / (lms_b[1] - lms_b[2] * lms_w[1] / lms_w[2]); b = (lms_b[0] - lms_b[1] * lms_w[0] / lms_w[1]) / (lms_b[2] - lms_b[1] * lms_w[2] / lms_w[1]); assert(fabs(a * lms_w[1] + b * lms_w[2] - lms_w[0]) < 1e-6); distort = (struct pl_matrix3x3) {{ { c, (1.0 - c) * a, (1.0 - c) * b}, { 0.0, 1.0, 0.0}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_M: // Solve to preserve neutral and blue a = (lms_b[1] - lms_b[2] * lms_w[1] / lms_w[2]) / (lms_b[0] - lms_b[2] * lms_w[0] / lms_w[2]); b = (lms_b[1] - lms_b[0] * lms_w[1] / lms_w[0]) / (lms_b[2] - lms_b[0] * lms_w[2] / lms_w[0]); assert(fabs(a * lms_w[0] + b * lms_w[2] - lms_w[1]) < 1e-6); distort = (struct pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, {(1.0 - c) * a, c, (1.0 - c) * b}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_S: // Solve to preserve neutral and red a = (lms_r[2] - lms_r[1] * lms_w[2] / lms_w[1]) / (lms_r[0] - lms_r[1] * lms_w[0] / lms_w[1]); b = (lms_r[2] - lms_r[0] * lms_w[2] / lms_w[0]) / (lms_r[1] - lms_r[0] * lms_w[1] / lms_w[0]); assert(fabs(a * lms_w[0] + b * lms_w[1] - lms_w[2]) < 1e-6); distort = (struct pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, { 0.0, 1.0, 0.0}, {(1.0 - c) * a, (1.0 - c) * b, c}, }}; break; case PL_CONE_LM: // Solve to preserve neutral a = lms_w[0] / lms_w[2]; b = lms_w[1] / lms_w[2]; distort = (struct pl_matrix3x3) {{ { c, 0.0, (1.0 - c) * a}, { 0.0, c, (1.0 - c) * b}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_MS: // Solve to preserve neutral a = lms_w[1] / lms_w[0]; b = lms_w[2] / lms_w[0]; distort = (struct pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, {(1.0 - c) * a, c, 0.0}, {(1.0 - c) * b, 0.0, c}, }}; break; case PL_CONE_LS: // Solve to preserve neutral a = lms_w[0] / lms_w[1]; b = lms_w[2] / lms_w[1]; distort = (struct pl_matrix3x3) {{ { c, (1.0 - c) * a, 0.0}, { 0.0, 1.0, 0.0}, { 0.0, (1.0 - c) * b, c}, }}; break; case PL_CONE_LMS: { // Rod cells only, which can be modelled somewhat as a combination of // L and M cones. Either way, this is pushing the limits of the our // color model, so this is only a rough approximation. const float w[3] = {0.3605, 0.6415, -0.002}; assert(fabs(w[0] + w[1] + w[2] - 1.0) < 1e-6); for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { distort.m[i][j] = (1.0 - c) * w[j] * lms_w[i] / lms_w[j]; if (i == j) distort.m[i][j] += c; } } break; } default: pl_unreachable(); } // out := RGB<-LMS * distort * LMS<-RGB struct pl_matrix3x3 out = rgb2lms; pl_matrix3x3_invert(&out); pl_matrix3x3_mul(&out, &distort); pl_matrix3x3_mul(&out, &rgb2lms); return out; } struct pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent) { // In saturation mapping, we don't care about accuracy and just want // primaries to map to primaries, making this an identity transformation. if (intent == PL_INTENT_SATURATION) return pl_matrix3x3_identity; // RGBd<-RGBs = RGBd<-XYZd * XYZd<-XYZs * XYZs<-RGBs // Equations from: http://www.brucelindbloom.com/index.html?Math.html // Note: Perceptual is treated like relative colorimetric. There's no // definition for perceptual other than "make it look good". // RGBd<-XYZd matrix struct pl_matrix3x3 xyz2rgb_d = pl_get_xyz2rgb_matrix(dst); // Chromatic adaptation, except in absolute colorimetric intent if (intent != PL_INTENT_ABSOLUTE_COLORIMETRIC) apply_chromatic_adaptation(src->white, dst->white, &xyz2rgb_d); // XYZs<-RGBs struct pl_matrix3x3 rgb2xyz_s = pl_get_rgb2xyz_matrix(src); pl_matrix3x3_mul(&xyz2rgb_d, &rgb2xyz_s); return xyz2rgb_d; } // Test the sign of 'p' relative to the line 'ab' (barycentric coordinates) static float test_point_line(const struct pl_cie_xy p, const struct pl_cie_xy a, const struct pl_cie_xy b) { return (p.x - b.x) * (a.y - b.y) - (a.x - b.x) * (p.y - b.y); } // Test if a point is entirely inside a gamut static float test_point_gamut(struct pl_cie_xy point, const struct pl_raw_primaries *prim) { float d1 = test_point_line(point, prim->red, prim->green), d2 = test_point_line(point, prim->green, prim->blue), d3 = test_point_line(point, prim->blue, prim->red); bool has_neg = d1 < 0 || d2 < 0 || d3 < 0, has_pos = d1 > 0 || d2 > 0 || d3 > 0; return !(has_neg && has_pos); } bool pl_primaries_superset(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { return test_point_gamut(b->red, a) && test_point_gamut(b->green, a) && test_point_gamut(b->blue, a); } /* Fill in the Y, U, V vectors of a yuv-to-rgb conversion matrix * based on the given luma weights of the R, G and B components (lr, lg, lb). * lr+lg+lb is assumed to equal 1. * This function is meant for colorspaces satisfying the following * conditions (which are true for common YUV colorspaces): * - The mapping from input [Y, U, V] to output [R, G, B] is linear. * - Y is the vector [1, 1, 1]. (meaning input Y component maps to 1R+1G+1B) * - U maps to a value with zero R and positive B ([0, x, y], y > 0; * i.e. blue and green only). * - V maps to a value with zero B and positive R ([x, y, 0], x > 0; * i.e. red and green only). * - U and V are orthogonal to the luma vector [lr, lg, lb]. * - The magnitudes of the vectors U and V are the minimal ones for which * the image of the set Y=[0...1],U=[-0.5...0.5],V=[-0.5...0.5] under the * conversion function will cover the set R=[0...1],G=[0...1],B=[0...1] * (the resulting matrix can be converted for other input/output ranges * outside this function). * Under these conditions the given parameters lr, lg, lb uniquely * determine the mapping of Y, U, V to R, G, B. */ static struct pl_matrix3x3 luma_coeffs(float lr, float lg, float lb) { pl_assert(fabs(lr+lg+lb - 1) < 1e-6); return (struct pl_matrix3x3) {{ {1, 0, 2 * (1-lr) }, {1, -2 * (1-lb) * lb/lg, -2 * (1-lr) * lr/lg }, {1, 2 * (1-lb), 0 }, }}; } // Applies hue and saturation controls to a YCbCr->RGB matrix static inline void apply_hue_sat(struct pl_matrix3x3 *m, const struct pl_color_adjustment *params) { // Hue is equivalent to rotating input [U, V] subvector around the origin. // Saturation scales [U, V]. float huecos = params->saturation * cos(params->hue); float huesin = params->saturation * sin(params->hue); for (int i = 0; i < 3; i++) { float u = m->m[i][1], v = m->m[i][2]; m->m[i][1] = huecos * u - huesin * v; m->m[i][2] = huesin * u + huecos * v; } } struct pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params) { params = PL_DEF(params, &pl_color_adjustment_neutral); struct pl_matrix3x3 m; switch (repr->sys) { case PL_COLOR_SYSTEM_BT_709: m = luma_coeffs(0.2126, 0.7152, 0.0722); break; case PL_COLOR_SYSTEM_BT_601: m = luma_coeffs(0.2990, 0.5870, 0.1140); break; case PL_COLOR_SYSTEM_SMPTE_240M: m = luma_coeffs(0.2122, 0.7013, 0.0865); break; case PL_COLOR_SYSTEM_BT_2020_NC: m = luma_coeffs(0.2627, 0.6780, 0.0593); break; case PL_COLOR_SYSTEM_BT_2020_C: // Note: This outputs into the [-0.5,0.5] range for chroma information. m = (struct pl_matrix3x3) {{ {0, 0, 1}, {1, 0, 0}, {0, 1, 0}, }}; break; case PL_COLOR_SYSTEM_BT_2100_PQ: { // Reversed from the matrix in the spec, hard-coded for efficiency // and precision reasons. Exact values truncated from ITU-T H-series // Supplement 18. static const float lm_t = 0.008609, lm_p = 0.111029625; m = (struct pl_matrix3x3) {{ {1.0, lm_t, lm_p}, {1.0, -lm_t, -lm_p}, {1.0, 0.560031, -0.320627}, }}; break; } case PL_COLOR_SYSTEM_BT_2100_HLG: { // Similar to BT.2100 PQ, exact values truncated from WolframAlpha static const float lm_t = 0.01571858011, lm_p = 0.2095810681; m = (struct pl_matrix3x3) {{ {1.0, lm_t, lm_p}, {1.0, -lm_t, -lm_p}, {1.0, 1.02127108, -0.605274491}, }}; break; } case PL_COLOR_SYSTEM_DOLBYVISION: m = repr->dovi->nonlinear; break; case PL_COLOR_SYSTEM_YCGCO: m = (struct pl_matrix3x3) {{ {1, -1, 1}, {1, 1, 0}, {1, -1, -1}, }}; break; case PL_COLOR_SYSTEM_UNKNOWN: // fall through case PL_COLOR_SYSTEM_RGB: m = pl_matrix3x3_identity; break; case PL_COLOR_SYSTEM_XYZ: { // For lack of anything saner to do, just assume the caller wants // BT.709 primaries, which is a reasonable assumption. m = pl_get_xyz2rgb_matrix(pl_raw_primaries_get(PL_COLOR_PRIM_BT_709)); break; } case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Apply hue and saturation in the correct way depending on the colorspace. if (pl_color_system_is_ycbcr_like(repr->sys)) { apply_hue_sat(&m, params); } else if (params->saturation != 1.0 || params->hue != 0.0) { // Arbitrarily simulate hue shifts using the BT.709 YCbCr model struct pl_matrix3x3 yuv2rgb = luma_coeffs(0.2126, 0.7152, 0.0722); struct pl_matrix3x3 rgb2yuv = yuv2rgb; pl_matrix3x3_invert(&rgb2yuv); apply_hue_sat(&yuv2rgb, params); // M := RGB<-YUV * YUV<-RGB * M pl_matrix3x3_rmul(&rgb2yuv, &m); pl_matrix3x3_rmul(&yuv2rgb, &m); } // Apply color temperature adaptation, relative to BT.709 primaries if (params->temperature) { struct pl_cie_xy src = pl_white_from_temp(6500); struct pl_cie_xy dst = pl_white_from_temp(6500 + 3500 * params->temperature); struct pl_matrix3x3 adapt = pl_get_adaptation_matrix(src, dst); pl_matrix3x3_rmul(&adapt, &m); } struct pl_transform3x3 out = { .mat = m }; int bit_depth = PL_DEF(repr->bits.sample_depth, PL_DEF(repr->bits.color_depth, 8)); double ymax, ymin, cmax, cmid; double scale = (1LL << bit_depth) / ((1LL << bit_depth) - 1.0); switch (pl_color_levels_guess(repr)) { case PL_COLOR_LEVELS_LIMITED: { ymax = 235 / 256. * scale; ymin = 16 / 256. * scale; cmax = 240 / 256. * scale; cmid = 128 / 256. * scale; break; } case PL_COLOR_LEVELS_FULL: // Note: For full-range YUV, there are multiple, subtly inconsistent // standards. So just pick the sanest implementation, which is to // assume MAX_INT == 1.0. ymax = 1.0; ymin = 0.0; cmax = 1.0; cmid = 128 / 256. * scale; // *not* exactly 0.5 break; default: pl_unreachable(); } double ymul = 1.0 / (ymax - ymin); double cmul = 0.5 / (cmax - cmid); double mul[3] = { ymul, ymul, ymul }; double black[3] = { ymin, ymin, ymin }; if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) { // The RPU matrix already includes levels normalization, but in this // case we also have to respect the signalled color offsets for (int i = 0; i < 3; i++) { mul[i] = 1.0; black[i] = repr->dovi->nonlinear_offset[i] * scale; } } else if (pl_color_system_is_ycbcr_like(repr->sys)) { mul[1] = mul[2] = cmul; black[1] = black[2] = cmid; } // Contrast scales the output value range (gain) // Brightness scales the constant output bias (black lift/boost) for (int i = 0; i < 3; i++) { mul[i] *= params->contrast; out.c[i] += params->brightness; } // Multiply in the texture multiplier and adjust `c` so that black[j] keeps // on mapping to RGB=0 (black to black) for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { out.mat.m[i][j] *= mul[j]; out.c[i] -= out.mat.m[i][j] * black[j]; } } // Finally, multiply in the scaling factor required to get the color up to // the correct representation. pl_matrix3x3_scale(&out.mat, pl_color_repr_normalize(repr)); // Update the metadata to reflect the change. repr->sys = PL_COLOR_SYSTEM_RGB; repr->levels = PL_COLOR_LEVELS_FULL; return out; } bool pl_icc_profile_equal(const struct pl_icc_profile *p1, const struct pl_icc_profile *p2) { if (p1->len != p2->len) return false; // Ignore signatures on length-0 profiles, as a special case return !p1->len || p1->signature == p2->signature; } void pl_icc_profile_compute_signature(struct pl_icc_profile *profile) { // In theory, we could get this value from the profile header itself if // lcms is available, but I'm not sure if it's even worth the trouble. Just // hard-code this to a siphash64(), which is decently fast anyway. profile->signature = pl_str_hash((pl_str) { .buf = (uint8_t *) profile->data, .len = profile->len }); } libplacebo-v4.192.1/src/common.c000066400000000000000000000257311417677245700164310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "version.h" int pl_fix_ver(void) { return BUILD_FIX_VER; } const char *pl_version(void) { return BUILD_VERSION; } void pl_rect2d_normalize(struct pl_rect2d *rc) { *rc = (struct pl_rect2d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), }; } void pl_rect3d_normalize(struct pl_rect3d *rc) { *rc = (struct pl_rect3d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), .z0 = PL_MIN(rc->z0, rc->z1), .z1 = PL_MAX(rc->z0, rc->z1), }; } void pl_rect2df_normalize(struct pl_rect2df *rc) { *rc = (struct pl_rect2df) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), }; } void pl_rect3df_normalize(struct pl_rect3df *rc) { *rc = (struct pl_rect3df) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), .z0 = PL_MIN(rc->z0, rc->z1), .z1 = PL_MAX(rc->z0, rc->z1), }; } struct pl_rect2d pl_rect2df_round(const struct pl_rect2df *rc) { return (struct pl_rect2d) { .x0 = roundf(rc->x0), .x1 = roundf(rc->x1), .y0 = roundf(rc->y0), .y1 = roundf(rc->y1), }; } struct pl_rect3d pl_rect3df_round(const struct pl_rect3df *rc) { return (struct pl_rect3d) { .x0 = roundf(rc->x0), .x1 = roundf(rc->x1), .y0 = roundf(rc->y0), .y1 = roundf(rc->y1), .z0 = roundf(rc->z0), .z1 = roundf(rc->z1), }; } const struct pl_matrix3x3 pl_matrix3x3_identity = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}; void pl_matrix3x3_apply(const struct pl_matrix3x3 *mat, float vec[3]) { float x = vec[0], y = vec[1], z = vec[2]; for (int i = 0; i < 3; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y + mat->m[i][2] * z; } void pl_matrix3x3_apply_rc(const struct pl_matrix3x3 *mat, struct pl_rect3df *rc) { float x0 = rc->x0, x1 = rc->x1, y0 = rc->y0, y1 = rc->y1, z0 = rc->z0, z1 = rc->z1; rc->x0 = mat->m[0][0] * x0 + mat->m[0][1] * y0 + mat->m[0][2] * z0; rc->y0 = mat->m[1][0] * x0 + mat->m[1][1] * y0 + mat->m[1][2] * z0; rc->z0 = mat->m[2][0] * x0 + mat->m[2][1] * y0 + mat->m[2][2] * z0; rc->x1 = mat->m[0][0] * x1 + mat->m[0][1] * y1 + mat->m[0][2] * z1; rc->y1 = mat->m[1][0] * x1 + mat->m[1][1] * y1 + mat->m[1][2] * z1; rc->z1 = mat->m[2][0] * x1 + mat->m[2][1] * y1 + mat->m[2][2] * z1; } void pl_matrix3x3_scale(struct pl_matrix3x3 *mat, float scale) { for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) mat->m[i][j] *= scale; } } void pl_matrix3x3_invert(struct pl_matrix3x3 *mat) { float m00 = mat->m[0][0], m01 = mat->m[0][1], m02 = mat->m[0][2], m10 = mat->m[1][0], m11 = mat->m[1][1], m12 = mat->m[1][2], m20 = mat->m[2][0], m21 = mat->m[2][1], m22 = mat->m[2][2]; // calculate the adjoint mat->m[0][0] = (m11 * m22 - m21 * m12); mat->m[0][1] = -(m01 * m22 - m21 * m02); mat->m[0][2] = (m01 * m12 - m11 * m02); mat->m[1][0] = -(m10 * m22 - m20 * m12); mat->m[1][1] = (m00 * m22 - m20 * m02); mat->m[1][2] = -(m00 * m12 - m10 * m02); mat->m[2][0] = (m10 * m21 - m20 * m11); mat->m[2][1] = -(m00 * m21 - m20 * m01); mat->m[2][2] = (m00 * m11 - m10 * m01); // calculate the determinant (as inverse == 1/det * adjoint, // adjoint * m == identity * det, so this calculates the det) float det = m00 * mat->m[0][0] + m10 * mat->m[0][1] + m20 * mat->m[0][2]; det = 1.0f / det; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) mat->m[i][j] *= det; } } void pl_matrix3x3_mul(struct pl_matrix3x3 *a, const struct pl_matrix3x3 *b) { float a00 = a->m[0][0], a01 = a->m[0][1], a02 = a->m[0][2], a10 = a->m[1][0], a11 = a->m[1][1], a12 = a->m[1][2], a20 = a->m[2][0], a21 = a->m[2][1], a22 = a->m[2][2]; for (int i = 0; i < 3; i++) { a->m[0][i] = a00 * b->m[0][i] + a01 * b->m[1][i] + a02 * b->m[2][i]; a->m[1][i] = a10 * b->m[0][i] + a11 * b->m[1][i] + a12 * b->m[2][i]; a->m[2][i] = a20 * b->m[0][i] + a21 * b->m[1][i] + a22 * b->m[2][i]; } } void pl_matrix3x3_rmul(const struct pl_matrix3x3 *a, struct pl_matrix3x3 *b) { struct pl_matrix3x3 m = *a; pl_matrix3x3_mul(&m, b); *b = m; } const struct pl_transform3x3 pl_transform3x3_identity = { .mat = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}, }; void pl_transform3x3_apply(const struct pl_transform3x3 *t, float vec[3]) { pl_matrix3x3_apply(&t->mat, vec); for (int i = 0; i < 3; i++) vec[i] += t->c[i]; } void pl_transform3x3_apply_rc(const struct pl_transform3x3 *t, struct pl_rect3df *rc) { pl_matrix3x3_apply_rc(&t->mat, rc); rc->x0 += t->c[0]; rc->x1 += t->c[0]; rc->y0 += t->c[1]; rc->y1 += t->c[1]; rc->z0 += t->c[2]; rc->z1 += t->c[2]; } void pl_transform3x3_scale(struct pl_transform3x3 *t, float scale) { pl_matrix3x3_scale(&t->mat, scale); for (int i = 0; i < 3; i++) t->c[i] *= scale; } // based on DarkPlaces engine (relicensed from GPL to LGPL) void pl_transform3x3_invert(struct pl_transform3x3 *t) { pl_matrix3x3_invert(&t->mat); float m00 = t->mat.m[0][0], m01 = t->mat.m[0][1], m02 = t->mat.m[0][2], m10 = t->mat.m[1][0], m11 = t->mat.m[1][1], m12 = t->mat.m[1][2], m20 = t->mat.m[2][0], m21 = t->mat.m[2][1], m22 = t->mat.m[2][2]; // fix the constant coefficient // rgb = M * yuv + C // M^-1 * rgb = yuv + M^-1 * C // yuv = M^-1 * rgb - M^-1 * C // ^^^^^^^^^^ float c0 = t->c[0], c1 = t->c[1], c2 = t->c[2]; t->c[0] = -(m00 * c0 + m01 * c1 + m02 * c2); t->c[1] = -(m10 * c0 + m11 * c1 + m12 * c2); t->c[2] = -(m20 * c0 + m21 * c1 + m22 * c2); } const struct pl_matrix2x2 pl_matrix2x2_identity = {{ { 1, 0 }, { 0, 1 }, }}; void pl_matrix2x2_apply(const struct pl_matrix2x2 *mat, float vec[2]) { float x = vec[0], y = vec[1]; for (int i = 0; i < 2; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y; } void pl_matrix2x2_apply_rc(const struct pl_matrix2x2 *mat, struct pl_rect2df *rc) { float x0 = rc->x0, x1 = rc->x1, y0 = rc->y0, y1 = rc->y1; rc->x0 = mat->m[0][0] * x0 + mat->m[0][1] * y0; rc->y0 = mat->m[1][0] * x0 + mat->m[1][1] * y0; rc->x1 = mat->m[0][0] * x1 + mat->m[0][1] * y1; rc->y1 = mat->m[1][0] * x1 + mat->m[1][1] * y1; } const struct pl_transform2x2 pl_transform2x2_identity = { .mat = {{ { 1, 0 }, { 0, 1 }, }}, }; void pl_transform2x2_apply(const struct pl_transform2x2 *t, float vec[2]) { pl_matrix2x2_apply(&t->mat, vec); for (int i = 0; i < 2; i++) vec[i] += t->c[i]; } void pl_transform2x2_apply_rc(const struct pl_transform2x2 *t, struct pl_rect2df *rc) { pl_matrix2x2_apply_rc(&t->mat, rc); rc->x0 += t->c[0]; rc->x1 += t->c[0]; rc->y0 += t->c[1]; rc->y1 += t->c[1]; } float pl_rect2df_aspect(const struct pl_rect2df *rc) { float w = fabs(pl_rect_w(*rc)), h = fabs(pl_rect_h(*rc)); return h ? (w / h) : 0.0; } void pl_rect2df_aspect_set(struct pl_rect2df *rc, float aspect, float panscan) { pl_assert(aspect >= 0); float orig_aspect = pl_rect2df_aspect(rc); if (!aspect || !orig_aspect) return; float scale_x, scale_y; if (aspect > orig_aspect) { // New aspect is wider than the original, so we need to either grow in // scale_x (panscan=1) or shrink in scale_y (panscan=0) scale_x = powf(aspect / orig_aspect, panscan); scale_y = powf(aspect / orig_aspect, panscan - 1.0); } else if (aspect < orig_aspect) { // New aspect is taller, so either grow in scale_y (panscan=1) or // shrink in scale_x (panscan=0) scale_x = powf(orig_aspect / aspect, panscan - 1.0); scale_y = powf(orig_aspect / aspect, panscan); } else { return; // No change in aspect } pl_rect2df_stretch(rc, scale_x, scale_y); } void pl_rect2df_aspect_fit(struct pl_rect2df *rc, const struct pl_rect2df *src, float panscan) { float orig_w = fabs(pl_rect_w(*rc)), orig_h = fabs(pl_rect_h(*rc)); if (!orig_w || !orig_h) return; // If either one of these is larger than 1, then we need to shrink to fit, // otherwise we can just directly stretch the rect. float scale_x = fabs(pl_rect_w(*src)) / orig_w, scale_y = fabs(pl_rect_h(*src)) / orig_h; if (scale_x > 1.0 || scale_y > 1.0) { pl_rect2df_aspect_copy(rc, src, panscan); } else { pl_rect2df_stretch(rc, scale_x, scale_y); } } void pl_rect2df_stretch(struct pl_rect2df *rc, float stretch_x, float stretch_y) { float midx = (rc->x0 + rc->x1) / 2.0, midy = (rc->y0 + rc->y1) / 2.0; rc->x0 = rc->x0 * stretch_x + midx * (1.0 - stretch_x); rc->x1 = rc->x1 * stretch_x + midx * (1.0 - stretch_x); rc->y0 = rc->y0 * stretch_y + midy * (1.0 - stretch_y); rc->y1 = rc->y1 * stretch_y + midy * (1.0 - stretch_y); } void pl_rect2df_offset(struct pl_rect2df *rc, float offset_x, float offset_y) { if (rc->x1 < rc->x0) offset_x = -offset_x; if (rc->y1 < rc->y0) offset_y = -offset_y; rc->x0 += offset_x; rc->x1 += offset_x; rc->y0 += offset_y; rc->y1 += offset_y; } void pl_rect2df_rotate(struct pl_rect2df *rc, pl_rotation rot) { if (!(rot = pl_rotation_normalize(rot))) return; float x0 = rc->x0, y0 = rc->y0, x1 = rc->x1, y1 = rc->y1; if (rot >= PL_ROTATION_180) { rot -= PL_ROTATION_180; PL_SWAP(x0, x1); PL_SWAP(y0, y1); } switch (rot) { case PL_ROTATION_0: *rc = (struct pl_rect2df) { .x0 = x0, .y0 = y0, .x1 = x1, .y1 = y1, }; return; case PL_ROTATION_90: *rc = (struct pl_rect2df) { .x0 = y1, .y0 = x0, .x1 = y0, .y1 = x1, }; return; default: pl_unreachable(); } } libplacebo-v4.192.1/src/common.h000066400000000000000000000140111417677245700164230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #define __STDC_FORMAT_MACROS #include #include #include #include #include #include #if defined(__MINGW32__) && !defined(__clang__) #define PL_PRINTF(fmt, va) __attribute__ ((format(gnu_printf, fmt, va))) #elif defined(__GNUC__) #define PL_PRINTF(fmt, va) __attribute__ ((format(printf, fmt, va))) #else #define PL_PRINTF(fmt, va) #endif #ifdef __unix__ #define PL_HAVE_UNIX #endif #ifdef _WIN32 #define PL_HAVE_WIN32 #endif #include "config_internal.h" #include "pl_assert.h" #include "pl_alloc.h" #include "pl_string.h" // Include all of the symbols that should be public in a way that marks them // as being externally visible. (Otherwise, all symbols are hidden by default) #pragma GCC visibility push(default) #include #undef PL_DEPRECATED #define PL_DEPRECATED #if PL_API_VER != BUILD_API_VER #error Header mismatch? pulled from elsewhere! #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PL_HAVE_LCMS #include "include/libplacebo/shaders/icc.h" #endif #ifdef PL_HAVE_VULKAN #include "include/libplacebo/vulkan.h" #endif #ifdef PL_HAVE_OPENGL #include "include/libplacebo/opengl.h" #endif #ifdef PL_HAVE_D3D11 #include "include/libplacebo/d3d11.h" #endif #pragma GCC visibility pop // Align up to the nearest multiple of an arbitrary alignment, which may also // be 0 to signal no alignment requirements. #define PL_ALIGN(x, align) ((align) ? ((x) + (align) - 1) / (align) * (align) : (x)) // This is faster but must only be called on positive powers of two. #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) // Returns the log base 2 of an unsigned long long #define PL_LOG2(x) ((unsigned) (8*sizeof (unsigned long long) - __builtin_clzll((x)) - 1)) // Rounds a number up to the nearest power of two #define PL_ALIGN_POT(x) (0x1LLU << (PL_LOG2((x) - 1) + 1)) // Returns whether or not a number is a power of two (or zero) #define PL_ISPOT(x) (((x) & ((x) - 1)) == 0) // Returns the size of a static array with known size. #define PL_ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) // Swaps two variables #define PL_SWAP(a, b) \ do { \ __typeof__ (a) tmp = (a); \ (a) = (b); \ (b) = tmp; \ } while (0) // Helper functions for transposing a matrix in-place. #define PL_TRANSPOSE_DIM(d, m) \ pl_transpose((d), (float[(d)*(d)]){0}, (const float *)(m)) #define PL_TRANSPOSE_2X2(m) PL_TRANSPOSE_DIM(2, m) #define PL_TRANSPOSE_3X3(m) PL_TRANSPOSE_DIM(3, m) #define PL_TRANSPOSE_4X4(m) PL_TRANSPOSE_DIM(4, m) static inline float *pl_transpose(int dim, float *out, const float *in) { for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) out[i * dim + j] = in[j * dim + i]; } return out; } // Helper functions for some common numeric operations (careful: double-eval) #define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) #define PL_MIN(x, y) ((x) < (y) ? (x) : (y)) #define PL_CLAMP(x, l, h) ((x) < (l) ? (l) : (x) > (h) ? (h) : (x)) #define PL_CMP(a, b) ((a) < (b) ? -1 : (a) > (b) ? 1 : 0) #define PL_DEF(x, d) ((x) ? (x) : (d)) #define PL_SQUARE(x) ((x) * (x)) #define PL_CUBE(x) ((x) * (x) * (x)) #define PL_MIX(a, b, x) ((x) * (b) + (1 - (x)) * (a)) // Helpers for doing alignment calculations static inline size_t pl_gcd(size_t x, size_t y) { assert(x && y); while (y) { size_t tmp = y; y = x % y; x = tmp; } return x; } static inline size_t pl_lcm(size_t x, size_t y) { assert(x && y); return x * (y / pl_gcd(x, y)); } // Error checking macro for stuff with integer errors, aborts on failure #define PL_CHECK_ERR(expr) \ do { \ int _ret = (expr); \ if (_ret) { \ fprintf(stderr, "libplacebo: internal error: %s (%s:%d)\n", \ strerror(_ret), __FILE__, __LINE__); \ abort(); \ } \ } while (0) // Refcounting helpers typedef _Atomic uint32_t pl_rc_t; #define pl_rc_init(rc) atomic_init(rc, 1) #define pl_rc_ref(rc) ((void) atomic_fetch_add_explicit(rc, 1, memory_order_acquire)) #define pl_rc_deref(rc) (atomic_fetch_sub_explicit(rc, 1, memory_order_release) == 1) #define pl_rc_count(rc) atomic_load(rc) #define pl_unreachable() (assert(!"unreachable"), __builtin_unreachable()) libplacebo-v4.192.1/src/d3d11/000077500000000000000000000000001417677245700156015ustar00rootroot00000000000000libplacebo-v4.192.1/src/d3d11/common.h000066400000000000000000000032421417677245700172430ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../common.h" #include "../log.h" // Shared struct used to hold the D3D11 device and associated interfaces struct d3d11_ctx { pl_log log; pl_d3d11 d3d11; // Copy of the device from pl_d3d11 for convenience. Does not hold an // additional reference. ID3D11Device *dev; // DXGI device. This does hold a reference. IDXGIDevice1 *dxgi_dev; // Debug interfaces ID3D11Debug *debug; ID3D11InfoQueue *iqueue; uint64_t last_discarded; // Last count of discarded messages // pl_gpu_is_failed (We saw a device removed error!) bool is_failed; }; // Pointer to dxgi.dll!CreateDXGIFactory1() typedef HRESULT (WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory); // DDK value. Apparently some D3D functions can return this instead of the // proper user-mode error code. See: // https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgiswapchain-present #define D3DDDIERR_DEVICEREMOVED (0x88760870) libplacebo-v4.192.1/src/d3d11/context.c000066400000000000000000000351551417677245700174420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" // Windows 8 enum value, not present in mingw-w64 v7 #define DXGI_ADAPTER_FLAG_SOFTWARE (2) const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS }; static INIT_ONCE d3d11_once = INIT_ONCE_STATIC_INIT; static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; static PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory1 = NULL; static void d3d11_load(void) { BOOL bPending = FALSE; InitOnceBeginInitialize(&d3d11_once, 0, &bPending, NULL); if (bPending) { HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); if (d3d11) { pD3D11CreateDevice = (void *) GetProcAddress(d3d11, "D3D11CreateDevice"); } HMODULE dxgi = LoadLibraryW(L"dxgi.dll"); if (dxgi) { pCreateDXGIFactory1 = (void *) GetProcAddress(dxgi, "CreateDXGIFactory1"); } } InitOnceComplete(&d3d11_once, 0, NULL); } // Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) static int get_feature_levels(int max_fl, int min_fl, const D3D_FEATURE_LEVEL **out) { static const D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0, D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1, }; static const int levels_len = PL_ARRAY_SIZE(levels); int start = 0; for (; start < levels_len; start++) { if (levels[start] <= max_fl) break; } int len = 0; for (; start + len < levels_len; len++) { if (levels[start + len] < min_fl) break; } *out = &levels[start]; return len; } static bool is_null_luid(LUID luid) { return luid.LowPart == 0 && luid.HighPart == 0; } static IDXGIAdapter *get_adapter(pl_d3d11 d3d11, LUID adapter_luid) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); IDXGIFactory1 *factory = NULL; IDXGIAdapter1 *adapter1 = NULL; IDXGIAdapter *adapter = NULL; HRESULT hr; if (!pCreateDXGIFactory1) { PL_FATAL(ctx, "Failed to load dxgi.dll"); goto error; } pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory); for (int i = 0;; i++) { hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1); if (hr == DXGI_ERROR_NOT_FOUND) break; if (FAILED(hr)) { PL_FATAL(ctx, "Failed to enumerate adapters"); goto error; } DXGI_ADAPTER_DESC1 desc; D3D(IDXGIAdapter1_GetDesc1(adapter1, &desc)); if (desc.AdapterLuid.LowPart == adapter_luid.LowPart && desc.AdapterLuid.HighPart == adapter_luid.HighPart) { break; } SAFE_RELEASE(adapter1); } if (!adapter1) { PL_FATAL(ctx, "Adapter with LUID %08lx%08lx not found", adapter_luid.HighPart, adapter_luid.LowPart); goto error; } D3D(IDXGIAdapter1_QueryInterface(adapter1, &IID_IDXGIAdapter, (void **) &adapter)); error: SAFE_RELEASE(factory); SAFE_RELEASE(adapter1); return adapter; } static bool has_sdk_layers(void) { // This will fail if the SDK layers aren't installed return SUCCEEDED(pD3D11CreateDevice(NULL, D3D_DRIVER_TYPE_NULL, NULL, D3D11_CREATE_DEVICE_DEBUG, NULL, 0, D3D11_SDK_VERSION, NULL, NULL, NULL)); } static ID3D11Device *create_device(struct pl_d3d11 *d3d11, const struct pl_d3d11_params *params) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); bool debug = params->debug; bool warp = params->force_software; int max_fl = params->max_feature_level; int min_fl = params->min_feature_level; ID3D11Device *dev = NULL; IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter *adapter = NULL; bool release_adapter = false; HRESULT hr; d3d11_load(); if (!pD3D11CreateDevice) { PL_FATAL(ctx, "Failed to load d3d11.dll"); goto error; } if (params->adapter) { adapter = params->adapter; } else if (!is_null_luid(params->adapter_luid)) { adapter = get_adapter(d3d11, params->adapter_luid); release_adapter = true; } if (debug && !has_sdk_layers()) { PL_INFO(ctx, "Debug layer not available, removing debug flag"); debug = false; } // Return here to retry creating the device do { // Use these default feature levels if they are not set max_fl = PL_DEF(max_fl, D3D_FEATURE_LEVEL_12_1); min_fl = PL_DEF(min_fl, D3D_FEATURE_LEVEL_9_1); // Get a list of feature levels from min_fl to max_fl const D3D_FEATURE_LEVEL *levels; int levels_len = get_feature_levels(max_fl, min_fl, &levels); if (!levels_len) { PL_FATAL(ctx, "No suitable Direct3D feature level found"); goto error; } D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_UNKNOWN; if (!adapter) { if (warp) { type = D3D_DRIVER_TYPE_WARP; } else { type = D3D_DRIVER_TYPE_HARDWARE; } } UINT flags = params->flags; if (debug) flags |= D3D11_CREATE_DEVICE_DEBUG; hr = pD3D11CreateDevice(adapter, type, NULL, flags, levels, levels_len, D3D11_SDK_VERSION, &dev, NULL, NULL); if (SUCCEEDED(hr)) break; // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or // below will not succeed. Try an 11_1 device. if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_12_0 && min_fl <= D3D_FEATURE_LEVEL_11_1) { PL_DEBUG(ctx, "Failed to create 12_0+ device, trying 11_1"); max_fl = D3D_FEATURE_LEVEL_11_1; continue; } // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 // without the platform update will not succeed. Try an 11_0 device. if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_11_1 && min_fl <= D3D_FEATURE_LEVEL_11_0) { PL_DEBUG(ctx, "Failed to create 11_1+ device, trying 11_0"); max_fl = D3D_FEATURE_LEVEL_11_0; continue; } // Retry with WARP if allowed if (!adapter && !warp && params->allow_software) { PL_DEBUG(ctx, "Failed to create hardware device, trying WARP: %s", pl_hresult_to_str(hr)); warp = true; max_fl = params->max_feature_level; min_fl = params->min_feature_level; continue; } PL_FATAL(ctx, "Failed to create Direct3D 11 device: %s", pl_hresult_to_str(hr)); goto error; } while (true); if (params->max_frame_latency) { D3D(ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, params->max_frame_latency); } d3d11->software = warp; error: if (release_adapter) SAFE_RELEASE(adapter); SAFE_RELEASE(dxgi_dev); return dev; } static void init_debug_layer(struct d3d11_ctx *ctx) { D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_ID3D11Debug, (void **) &ctx->debug)); D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_ID3D11InfoQueue, (void **) &ctx->iqueue)); // Filter some annoying messages D3D11_MESSAGE_ID deny_ids[] = { // This false-positive error occurs every time we Draw() with a shader // that samples from a texture format that only supports point sampling. // Since we already use CheckFormatSupport to know which formats can be // linearly sampled from, we shouldn't ever bind a non-point sampler to // a format that doesn't support it. D3D11_MESSAGE_ID_DEVICE_DRAW_RESOURCE_FORMAT_SAMPLE_UNSUPPORTED, }; D3D11_INFO_QUEUE_FILTER filter = { .DenyList = { .NumIDs = PL_ARRAY_SIZE(deny_ids), .pIDList = deny_ids, }, }; ID3D11InfoQueue_PushStorageFilter(ctx->iqueue, &filter); error: return; } void pl_d3d11_destroy(pl_d3d11 *ptr) { pl_d3d11 d3d11 = *ptr; if (!d3d11) return; struct d3d11_ctx *ctx = PL_PRIV(d3d11); pl_gpu_destroy(d3d11->gpu); SAFE_RELEASE(ctx->dev); SAFE_RELEASE(ctx->dxgi_dev); if (ctx->debug) { // Report any leaked objects pl_d3d11_flush_message_queue(ctx, "After destroy"); ID3D11Debug_ReportLiveDeviceObjects(ctx->debug, D3D11_RLDO_DETAIL); pl_d3d11_flush_message_queue(ctx, "After leak check"); ID3D11Debug_ReportLiveDeviceObjects(ctx->debug, D3D11_RLDO_SUMMARY); pl_d3d11_flush_message_queue(ctx, "After leak summary"); } SAFE_RELEASE(ctx->debug); SAFE_RELEASE(ctx->iqueue); pl_free_ptr((void **) ptr); } pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params) { params = PL_DEF(params, &pl_d3d11_default_params); IDXGIAdapter1 *adapter = NULL; IDXGIAdapter2 *adapter2 = NULL; bool success = false; HRESULT hr; struct pl_d3d11 *d3d11 = pl_zalloc_obj(NULL, d3d11, struct d3d11_ctx); struct d3d11_ctx *ctx = PL_PRIV(d3d11); ctx->log = log; ctx->d3d11 = d3d11; if (params->device) { d3d11->device = params->device; ID3D11Device_AddRef(d3d11->device); } else { d3d11->device = create_device(d3d11, params); if (!d3d11->device) goto error; } ctx->dev = d3d11->device; D3D(ID3D11Device_QueryInterface(d3d11->device, &IID_IDXGIDevice1, (void **) &ctx->dxgi_dev)); D3D(IDXGIDevice1_GetParent(ctx->dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter2, (void **) &adapter2); if (FAILED(hr)) adapter2 = NULL; if (adapter2) { PL_INFO(ctx, "Using DXGI 1.2+"); } else { PL_INFO(ctx, "Using DXGI 1.1"); } D3D_FEATURE_LEVEL fl = ID3D11Device_GetFeatureLevel(d3d11->device); PL_INFO(ctx, "Using Direct3D 11 feature level %u_%u", ((unsigned) fl) >> 12, (((unsigned) fl) >> 8) & 0xf); char *dev_name = NULL; UINT vendor_id, device_id, revision, subsys_id; LUID adapter_luid; UINT flags; if (adapter2) { // DXGI 1.2 IDXGIAdapter2::GetDesc2 is preferred over the DXGI 1.1 // version because it reports the real adapter information when using // feature level 9 hardware DXGI_ADAPTER_DESC2 desc; D3D(IDXGIAdapter2_GetDesc2(adapter2, &desc)); dev_name = pl_to_utf8(NULL, desc.Description); vendor_id = desc.VendorId; device_id = desc.DeviceId; revision = desc.Revision; subsys_id = desc.SubSysId; adapter_luid = desc.AdapterLuid; flags = desc.Flags; } else { DXGI_ADAPTER_DESC1 desc; D3D(IDXGIAdapter1_GetDesc1(adapter, &desc)); dev_name = pl_to_utf8(NULL, desc.Description); vendor_id = desc.VendorId; device_id = desc.DeviceId; revision = desc.Revision; subsys_id = desc.SubSysId; adapter_luid = desc.AdapterLuid; flags = desc.Flags; } PL_INFO(ctx, "Direct3D 11 device properties:"); PL_INFO(ctx, " Device Name: %s", dev_name); PL_INFO(ctx, " Device ID: %04x:%04x (rev %02x)", vendor_id, device_id, revision); PL_INFO(ctx, " Subsystem ID: %04x:%04x", LOWORD(subsys_id), HIWORD(subsys_id)); PL_INFO(ctx, " LUID: %08lx%08lx", adapter_luid.HighPart, adapter_luid.LowPart); pl_free(dev_name); LARGE_INTEGER version; hr = IDXGIAdapter1_CheckInterfaceSupport(adapter, &IID_IDXGIDevice, &version); if (SUCCEEDED(hr)) { PL_INFO(ctx, " Driver version: %u.%u.%u.%u", HIWORD(version.HighPart), LOWORD(version.HighPart), HIWORD(version.LowPart), LOWORD(version.LowPart)); } // Note: DXGI_ADAPTER_FLAG_SOFTWARE doesn't exist before Windows 8, but we // also set d3d11->software in create_device if we pick WARP ourselves if (flags & DXGI_ADAPTER_FLAG_SOFTWARE) d3d11->software = true; // If the primary display adapter is a software adapter, the // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should // still match the Microsoft Basic Render Driver if (vendor_id == 0x1414 && device_id == 0x8c) d3d11->software = true; if (d3d11->software) { bool external_adapter = params->device || params->adapter || !is_null_luid(params->adapter_luid); // The allow_software flag only applies if the API user didn't manually // specify an adapter or a device if (!params->allow_software && !external_adapter) { // If we got this far with allow_software set, the primary adapter // must be a software adapter PL_ERR(ctx, "Primary adapter is a software adapter"); goto error; } // If a software adapter was manually specified, don't show a warning enum pl_log_level level = PL_LOG_WARN; if (external_adapter || params->force_software) level = PL_LOG_INFO; PL_MSG(ctx, level, "Using a software adapter"); } // Init debug layer if (ID3D11Device_GetCreationFlags(d3d11->device) & D3D11_CREATE_DEVICE_DEBUG) init_debug_layer(ctx); d3d11->gpu = pl_gpu_create_d3d11(ctx); if (!d3d11->gpu) goto error; success = true; error: if (!success) { PL_FATAL(ctx, "Failed initializing Direct3D 11 device"); pl_d3d11_destroy((const struct pl_d3d11 **) &d3d11); } SAFE_RELEASE(adapter); SAFE_RELEASE(adapter2); return d3d11; } libplacebo-v4.192.1/src/d3d11/formats.c000066400000000000000000000253731417677245700174320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "formats.h" #include "gpu.h" #define FMT(_minor, _name, _dxfmt, _type, num, size, bits, order) \ (struct d3d_format) { \ .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \ .minor = _minor, \ .fmt = { \ .name = _name, \ .type = PL_FMT_##_type, \ .num_components = num, \ .component_depth = bits, \ .texel_size = size, \ .texel_align = 1, \ .internal_size = size, \ .host_bits = bits, \ .sample_order = order, \ }, \ } #define IDX(...) {__VA_ARGS__} #define BITS(...) {__VA_ARGS__} #define REGFMT(name, dxfmt, type, num, bits) \ FMT(0, name, dxfmt, type, num, (num) * (bits) / 8, \ BITS(bits, bits, bits, bits), \ IDX(0, 1, 2, 3)) const struct d3d_format pl_d3d11_formats[] = { REGFMT("r8", R8, UNORM, 1, 8), REGFMT("rg8", R8G8, UNORM, 2, 8), REGFMT("rgba8", R8G8B8A8, UNORM, 4, 8), REGFMT("r16", R16, UNORM, 1, 16), REGFMT("rg16", R16G16, UNORM, 2, 16), REGFMT("rgba16", R16G16B16A16, UNORM, 4, 16), REGFMT("r8s", R8, SNORM, 1, 8), REGFMT("rg8s", R8G8, SNORM, 2, 8), REGFMT("rgba8s", R8G8B8A8, SNORM, 4, 8), REGFMT("r16s", R16, SNORM, 1, 16), REGFMT("rg16s", R16G16, SNORM, 2, 16), REGFMT("rgba16s", R16G16B16A16, SNORM, 4, 16), REGFMT("r16hf", R16, FLOAT, 1, 16), REGFMT("rg16hf", R16G16, FLOAT, 2, 16), REGFMT("rgba16hf", R16G16B16A16, FLOAT, 4, 16), REGFMT("r32f", R32, FLOAT, 1, 32), REGFMT("rg32f", R32G32, FLOAT, 2, 32), REGFMT("rgb32f", R32G32B32, FLOAT, 3, 32), REGFMT("rgba32f", R32G32B32A32, FLOAT, 4, 32), REGFMT("r8u", R8, UINT, 1, 8), REGFMT("rg8u", R8G8, UINT, 2, 8), REGFMT("rgba8u", R8G8B8A8, UINT, 4, 8), REGFMT("r16u", R16, UINT, 1, 16), REGFMT("rg16u", R16G16, UINT, 2, 16), REGFMT("rgba16u", R16G16B16A16, UINT, 4, 16), REGFMT("r32u", R32, UINT, 1, 32), REGFMT("rg32u", R32G32, UINT, 2, 32), REGFMT("rgb32u", R32G32B32, UINT, 3, 32), REGFMT("rgba32u", R32G32B32A32, UINT, 4, 32), REGFMT("r8i", R8, SINT, 1, 8), REGFMT("rg8i", R8G8, SINT, 2, 8), REGFMT("rgba8i", R8G8B8A8, SINT, 4, 8), REGFMT("r16i", R16, SINT, 1, 16), REGFMT("rg16i", R16G16, SINT, 2, 16), REGFMT("rgba16i", R16G16B16A16, SINT, 4, 16), REGFMT("r32i", R32, SINT, 1, 32), REGFMT("rg32i", R32G32, SINT, 2, 32), REGFMT("rgb32i", R32G32B32, SINT, 3, 32), REGFMT("rgba32i", R32G32B32A32, SINT, 4, 32), FMT(0, "rgb10a2", R10G10B10A2, UNORM, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), FMT(0, "rgb10a2u", R10G10B10A2, UINT, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), FMT(0, "bgra8", B8G8R8A8, UNORM, 4, 4, BITS( 8, 8, 8, 8), IDX(2, 1, 0, 3)), FMT(0, "bgrx8", B8G8R8X8, UNORM, 3, 4, BITS( 8, 8, 8), IDX(2, 1, 0)), FMT(0, "rg11b10f", R11G11B10, FLOAT, 3, 4, BITS(11, 11, 10), IDX(0, 1, 2)), // D3D11.1 16-bit formats (resurrected D3D9 formats) FMT(1, "bgr565", B5G6R5, UNORM, 3, 2, BITS( 5, 6, 5), IDX(2, 1, 0)), FMT(1, "bgr5a1", B5G5R5A1, UNORM, 4, 2, BITS( 5, 5, 5, 1), IDX(2, 1, 0, 3)), FMT(1, "bgra4", B4G4R4A4, UNORM, 4, 2, BITS( 4, 4, 4, 4), IDX(2, 1, 0, 3)), {0} }; #undef BITS #undef IDX #undef REGFMT #undef FMT void pl_d3d11_setup_formats(struct pl_gpu *gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); PL_ARRAY(pl_fmt) formats = {0}; HRESULT hr; for (int i = 0; pl_d3d11_formats[i].dxfmt; i++) { const struct d3d_format *d3d_fmt = &pl_d3d11_formats[i]; // The Direct3D 11.0 debug layer will segfault if CheckFormatSupport is // called on a format it doesn't know about if (pl_d3d11_formats[i].minor > p->minor) continue; UINT sup = 0; hr = ID3D11Device_CheckFormatSupport(p->dev, d3d_fmt->dxfmt, &sup); if (FAILED(hr)) continue; D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3d_fmt->dxfmt }; ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2, ², sizeof(sup2)); struct pl_fmt *fmt = pl_alloc_obj(gpu, fmt, struct d3d_fmt *); const struct d3d_format **fmtp = PL_PRIV(fmt); *fmt = d3d_fmt->fmt; *fmtp = d3d_fmt; // For sanity, clear the superfluous fields for (int j = fmt->num_components; j < 4; j++) { fmt->component_depth[j] = 0; fmt->sample_order[j] = 0; fmt->host_bits[j] = 0; } static const struct { enum pl_fmt_caps caps; UINT sup; UINT sup2; } support[] = { { .caps = PL_FMT_CAP_SAMPLEABLE, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D, }, { .caps = PL_FMT_CAP_STORABLE, // SHADER_LOAD is for readonly images, which can use a SRV .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | D3D11_FORMAT_SUPPORT_SHADER_LOAD, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, }, { .caps = PL_FMT_CAP_READWRITE, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD, }, { .caps = PL_FMT_CAP_LINEAR, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE, }, { .caps = PL_FMT_CAP_RENDERABLE, .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET, }, { .caps = PL_FMT_CAP_BLENDABLE, .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE, }, { .caps = PL_FMT_CAP_VERTEX, .sup = D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER, }, { .caps = PL_FMT_CAP_TEXEL_UNIFORM, .sup = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_SHADER_LOAD, }, { .caps = PL_FMT_CAP_TEXEL_STORAGE, // SHADER_LOAD is for readonly buffers, which can use a SRV .sup = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | D3D11_FORMAT_SUPPORT_SHADER_LOAD, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, }, { .caps = PL_FMT_CAP_HOST_READABLE, .sup = D3D11_FORMAT_SUPPORT_CPU_LOCKABLE, }, }; for (int j = 0; j < PL_ARRAY_SIZE(support); j++) { if ((sup & support[j].sup) == support[j].sup && (sup2.OutFormatSupport2 & support[j].sup2) == support[j].sup2) { fmt->caps |= support[j].caps; } } // PL_FMT_CAP_STORABLE implies compute shaders, so don't set it if we // don't have them if (!gpu->glsl.compute) fmt->caps &= ~PL_FMT_CAP_STORABLE; // PL_FMT_CAP_READWRITE implies PL_FMT_CAP_STORABLE if (!(fmt->caps & PL_FMT_CAP_STORABLE)) fmt->caps &= ~PL_FMT_CAP_READWRITE; // We can't sample from integer textures if (fmt->type == PL_FMT_UINT || fmt->type == PL_FMT_SINT) fmt->caps &= ~(PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR); // `fmt->gatherable` must have PL_FMT_CAP_SAMPLEABLE if ((fmt->caps & PL_FMT_CAP_SAMPLEABLE) && (sup & D3D11_FORMAT_SUPPORT_SHADER_GATHER)) { fmt->gatherable = true; } // PL_FMT_CAP_BLITTABLE implies support for stretching, flipping and // loose format conversion, which require a shader pass in D3D11 if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // On >=FL11_0, we use a compute pass, which supports 1D and 3D // textures if (fmt->caps & PL_FMT_CAP_STORABLE) fmt->caps |= PL_FMT_CAP_BLITTABLE; } else { // On caps & req) == req) fmt->caps |= PL_FMT_CAP_BLITTABLE; } if (fmt->caps & (PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM | PL_FMT_CAP_TEXEL_STORAGE)) { fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); pl_assert(fmt->glsl_type); } if (fmt->caps & (PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE)) fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); // If no caps, D3D11 only supports this for things we don't care about if (!fmt->caps) { pl_free(fmt); continue; } PL_ARRAY_APPEND(gpu, formats, fmt); } gpu->formats = formats.elem; gpu->num_formats = formats.num; } libplacebo-v4.192.1/src/d3d11/formats.h000066400000000000000000000021451417677245700174270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" struct d3d_format { DXGI_FORMAT dxfmt; int minor; // The D3D11 minor version number which supports this format struct pl_fmt fmt; }; extern const struct d3d_format pl_d3d11_formats[]; static inline DXGI_FORMAT fmt_to_dxgi(pl_fmt fmt) { const struct d3d_format **fmtp = PL_PRIV(fmt); return (*fmtp)->dxfmt; } void pl_d3d11_setup_formats(struct pl_gpu *gpu); libplacebo-v4.192.1/src/d3d11/gpu.c000066400000000000000000000600721417677245700165450ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" #define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80) #define DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES (0x8) struct timer_query { ID3D11Query *ts_start; ID3D11Query *ts_end; ID3D11Query *disjoint; }; struct pl_timer { // Ring buffer of timer queries to use int current; int pending; struct timer_query queries[16]; }; void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; if (!timer) return; struct timer_query *query = &timer->queries[timer->current]; // Create the query objects lazilly if (!query->ts_start) { D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_start)); D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_end)); // Measuring duration in D3D11 requires three queries: start and end // timestamp queries, and a disjoint query containing a flag which says // whether the timestamps are usable or if a discontinuity occurred // between them, like a change in power state or clock speed. The // disjoint query also contains the timer frequency, so the timestamps // are useless without it. D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &query->disjoint)); } // Query the start timestamp ID3D11DeviceContext_Begin(p->imm, (ID3D11Asynchronous *) query->disjoint); ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_start); return; error: SAFE_RELEASE(query->ts_start); SAFE_RELEASE(query->ts_end); SAFE_RELEASE(query->disjoint); } void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); if (!timer) return; struct timer_query *query = &timer->queries[timer->current]; // Even if timer_start and timer_end are called in-order, timer_start might // have failed to create the timer objects if (!query->ts_start) return; // Query the end timestamp ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_end); ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->disjoint); // Advance to the next set of queries, for the next call to timer_start timer->current++; if (timer->current >= PL_ARRAY_SIZE(timer->queries)) timer->current = 0; // Wrap around // Increment the number of pending queries, unless the ring buffer is full, // in which case, timer->current now points to the oldest one, which will be // dropped and reused if (timer->pending < PL_ARRAY_SIZE(timer->queries)) timer->pending++; } static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) { static const uint64_t ns_per_s = 1000000000llu; return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; } static uint64_t d3d11_timer_query(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; HRESULT hr; for (; timer->pending > 0; timer->pending--) { int index = timer->current - timer->pending; if (index < 0) index += PL_ARRAY_SIZE(timer->queries); struct timer_query *query = &timer->queries[index]; UINT64 start, end; D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; // Fetch the results of each query, or on S_FALSE, return 0 to indicate // the queries are still pending D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->disjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->ts_end, &end, sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->ts_start, &start, sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; // There was a discontinuity during the queries, so a timestamp can't be // produced. Skip it and try the next one. if (dj.Disjoint || !dj.Frequency) continue; // We got a result. Return it to the caller. timer->pending--; pl_d3d11_flush_message_queue(ctx, "After timer query"); uint64_t ns = timestamp_to_ns(end - start, dj.Frequency); return PL_MAX(ns, 1); error: // There was an error fetching the timer result, so skip it and try the // next one continue; } // No more unprocessed results return 0; } static void d3d11_timer_destroy(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; for (int i = 0; i < PL_ARRAY_SIZE(timer->queries); i++) { SAFE_RELEASE(timer->queries[i].ts_start); SAFE_RELEASE(timer->queries[i].ts_end); SAFE_RELEASE(timer->queries[i].disjoint); } pl_d3d11_flush_message_queue(ctx, "After timer destroy"); pl_free(timer); } static pl_timer d3d11_timer_create(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); if (!p->has_timestamp_queries) return NULL; struct pl_timer *timer = pl_alloc_ptr(NULL, timer); *timer = (struct pl_timer) {0}; return timer; } static int d3d11_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { // Vulkan-style binding, where all descriptors are in the same namespace, is // required to use SPIRV-Cross' HLSL resource mapping API, which targets // resources by binding number return 0; } static void d3d11_gpu_flush(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; ID3D11DeviceContext_Flush(p->imm); pl_d3d11_flush_message_queue(ctx, "After gpu flush"); } static void d3d11_gpu_finish(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; HRESULT hr; if (p->finish_fence) { p->finish_value++; D3D(ID3D11Fence_SetEventOnCompletion(p->finish_fence, p->finish_value, p->finish_event)); ID3D11DeviceContext4_Signal(p->imm4, p->finish_fence, p->finish_value); ID3D11DeviceContext_Flush(p->imm); WaitForSingleObject(p->finish_event, INFINITE); } else { ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) p->finish_query); // D3D11 doesn't have blocking queries, but it does have blocking // readback. As a performance hack to try to avoid polling, do a dummy // copy/readback between two buffers. Hopefully this will block until // all prior commands are finished. If it does, the first GetData call // will return a result and we won't have to poll. pl_buf_copy(gpu, p->finish_buf_dst, 0, p->finish_buf_src, 0, sizeof(uint32_t)); pl_buf_read(gpu, p->finish_buf_dst, 0, &(uint32_t) {0}, sizeof(uint32_t)); // Poll the event query until it completes for (;;) { BOOL idle; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) p->finish_query, &idle, sizeof(idle), 0)); if (hr == S_OK && idle) break; Sleep(1); } } pl_d3d11_flush_message_queue(ctx, "After gpu finish"); error: return; } static bool d3d11_gpu_is_failed(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; if (ctx->is_failed) return true; // GetDeviceRemovedReason returns S_OK if the device isn't removed HRESULT hr = ID3D11Device_GetDeviceRemovedReason(p->dev); if (FAILED(hr)) { ctx->is_failed = true; pl_d3d11_after_error(ctx, hr); } return ctx->is_failed; } static void d3d11_gpu_destroy(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_buf_destroy(gpu, &p->finish_buf_src); pl_buf_destroy(gpu, &p->finish_buf_dst); pl_dispatch_destroy(&p->dp); // Release everything except the immediate context SAFE_RELEASE(p->dev); SAFE_RELEASE(p->dev1); SAFE_RELEASE(p->dev5); SAFE_RELEASE(p->imm1); SAFE_RELEASE(p->imm4); SAFE_RELEASE(p->vbuf.buf); SAFE_RELEASE(p->ibuf.buf); SAFE_RELEASE(p->rstate); SAFE_RELEASE(p->dsstate); for (int i = 0; i < PL_TEX_SAMPLE_MODE_COUNT; i++) { for (int j = 0; j < PL_TEX_ADDRESS_MODE_COUNT; j++) { SAFE_RELEASE(p->samplers[i][j]); } } SAFE_RELEASE(p->finish_fence); if (p->finish_event) CloseHandle(p->finish_event); SAFE_RELEASE(p->finish_query); // Destroy the immediate context synchronously so referenced objects don't // show up in the leak check ID3D11DeviceContext_ClearState(p->imm); ID3D11DeviceContext_Flush(p->imm); SAFE_RELEASE(p->imm); pl_free((void *) gpu); } pl_d3d11 pl_d3d11_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == d3d11_gpu_destroy) { struct pl_gpu_d3d11 *p = (struct pl_gpu_d3d11 *) impl; return p->ctx->d3d11; } return NULL; } static bool load_d3d_compiler(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); HMODULE d3dcompiler = NULL; static const struct { const wchar_t *name; bool inbox; } compiler_dlls[] = { // Try the inbox D3DCompiler first (Windows 8.1 and up) { .name = L"d3dcompiler_47.dll", .inbox = true }, // Check for a packaged version of d3dcompiler_47.dll { .name = L"d3dcompiler_47.dll" }, // Try d3dcompiler_46.dll from the Windows 8 SDK { .name = L"d3dcompiler_46.dll" }, // Try d3dcompiler_43.dll from the June 2010 DirectX SDK { .name = L"d3dcompiler_43.dll" }, }; for (int i = 0; i < PL_ARRAY_SIZE(compiler_dlls); i++) { if (compiler_dlls[i].inbox) { if (!IsWindows8Point1OrGreater()) continue; d3dcompiler = LoadLibraryExW(compiler_dlls[i].name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); } else { d3dcompiler = LoadLibraryW(compiler_dlls[i].name); } if (!d3dcompiler) continue; p->D3DCompile = (void *) GetProcAddress(d3dcompiler, "D3DCompile"); if (!p->D3DCompile) return false; p->d3d_compiler_ver = pl_get_dll_version(compiler_dlls[i].name); return true; } return false; } static struct pl_gpu_fns pl_fns_d3d11 = { .tex_create = pl_d3d11_tex_create, .tex_destroy = pl_d3d11_tex_destroy, .tex_invalidate = pl_d3d11_tex_invalidate, .tex_clear_ex = pl_d3d11_tex_clear_ex, .tex_blit = pl_d3d11_tex_blit, .tex_upload = pl_d3d11_tex_upload, .tex_download = pl_d3d11_tex_download, .buf_create = pl_d3d11_buf_create, .buf_destroy = pl_d3d11_buf_destroy, .buf_write = pl_d3d11_buf_write, .buf_read = pl_d3d11_buf_read, .buf_copy = pl_d3d11_buf_copy, .desc_namespace = d3d11_desc_namespace, .pass_create = pl_d3d11_pass_create, .pass_destroy = pl_d3d11_pass_destroy, .pass_run = pl_d3d11_pass_run, .timer_create = d3d11_timer_create, .timer_destroy = d3d11_timer_destroy, .timer_query = d3d11_timer_query, .gpu_flush = d3d11_gpu_flush, .gpu_finish = d3d11_gpu_finish, .gpu_is_failed = d3d11_gpu_is_failed, .destroy = d3d11_gpu_destroy, }; pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx) { pl_assert(ctx->dev); IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter1 *adapter = NULL; IDXGIAdapter4 *adapter4 = NULL; bool success = false; HRESULT hr; struct pl_gpu *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gpu_d3d11); gpu->log = ctx->log; gpu->ctx = gpu->log; struct pl_gpu_d3d11 *p = PL_PRIV(gpu); *p = (struct pl_gpu_d3d11) { .ctx = ctx, .impl = pl_fns_d3d11, .dev = ctx->dev, .spirv = spirv_compiler_create(ctx->log), .vbuf.bind_flags = D3D11_BIND_VERTEX_BUFFER, .ibuf.bind_flags = D3D11_BIND_INDEX_BUFFER, }; if (!p->spirv) goto error; ID3D11Device_AddRef(p->dev); ID3D11Device_GetImmediateContext(p->dev, &p->imm); // Check D3D11.1 interfaces hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, (void **) &p->dev1); if (SUCCEEDED(hr)) { p->minor = 1; ID3D11Device1_GetImmediateContext1(p->dev1, &p->imm1); } // Check D3D11.4 interfaces hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device5, (void **) &p->dev5); if (SUCCEEDED(hr)) { // There is no GetImmediateContext4 method hr = ID3D11DeviceContext_QueryInterface(p->imm, &IID_ID3D11DeviceContext4, (void **) &p->imm4); if (SUCCEEDED(hr)) p->minor = 4; } PL_INFO(gpu, "Using Direct3D 11.%d runtime", p->minor); D3D(ID3D11Device_QueryInterface(p->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); DXGI_ADAPTER_DESC1 adapter_desc = {0}; IDXGIAdapter1_GetDesc1(adapter, &adapter_desc); // No resource can be larger than max_res_size in bytes unsigned int max_res_size = PL_CLAMP( D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_B_TERM * adapter_desc.DedicatedVideoMemory, D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u, D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024u * 1024u); gpu->glsl = (struct pl_glsl_version) { .version = 450, .vulkan = true, }; gpu->limits = (struct pl_gpu_limits) { .max_buf_size = max_res_size, .max_ssbo_size = max_res_size, .max_vbo_size = max_res_size, .align_vertex_stride = 1, // Make up some values .align_tex_xfer_offset = 32, .align_tex_xfer_pitch = 1, .fragment_queues = 1, }; p->fl = ID3D11Device_GetFeatureLevel(p->dev); // If we're not using FL9_x, we can use the same suballocated buffer as a // vertex buffer and index buffer if (p->fl >= D3D_FEATURE_LEVEL_10_0) p->vbuf.bind_flags |= D3D11_BIND_INDEX_BUFFER; if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_ubo_size = D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * CBUF_ELEM; } else { // 10level9 restriction: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context gpu->limits.max_ubo_size = 255 * CBUF_ELEM; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { gpu->limits.max_tex_1d_dim = D3D11_REQ_TEXTURE1D_U_DIMENSION; gpu->limits.max_tex_2d_dim = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_tex_1d_dim = D3D10_REQ_TEXTURE1D_U_DIMENSION; gpu->limits.max_tex_2d_dim = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { gpu->limits.max_tex_2d_dim = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; // Same limit as FL9_1 gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else { gpu->limits.max_tex_2d_dim = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_buffer_texels = 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { gpu->glsl.compute = true; gpu->limits.compute_queues = 1; // Set `gpu->limits.blittable_1d_3d`, since `pl_tex_blit_compute`, which // is used to emulate blits on 11_0 and up, supports 1D and 3D textures gpu->limits.blittable_1d_3d = true; gpu->glsl.max_shmem_size = D3D11_CS_TGSM_REGISTER_COUNT * sizeof(float); gpu->glsl.max_group_threads = D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; gpu->glsl.max_group_size[0] = D3D11_CS_THREAD_GROUP_MAX_X; gpu->glsl.max_group_size[1] = D3D11_CS_THREAD_GROUP_MAX_Y; gpu->glsl.max_group_size[2] = D3D11_CS_THREAD_GROUP_MAX_Z; gpu->limits.max_dispatch[0] = gpu->limits.max_dispatch[1] = gpu->limits.max_dispatch[2] = D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // The offset limits are defined by HLSL: // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4-po--sm5---asm- gpu->glsl.min_gather_offset = -32; gpu->glsl.max_gather_offset = 31; } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { // SM4.1 has no gather4_po, so the offset must be specified by an // immediate with a range of [-8, 7] // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4--sm4-1---asm- // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sample--sm4---asm-#address-offset gpu->glsl.min_gather_offset = -8; gpu->glsl.max_gather_offset = 7; } if (p->fl >= D3D_FEATURE_LEVEL_10_0) { p->max_srvs = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; } else { // 10level9 restriction: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context p->max_srvs = 8; } if (p->fl >= D3D_FEATURE_LEVEL_11_1) { p->max_uavs = D3D11_1_UAV_SLOT_COUNT; } else { p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; } if (!load_d3d_compiler(gpu)) { PL_FATAL(gpu, "Could not find D3DCompiler DLL"); goto error; } PL_INFO(gpu, "D3DCompiler version: %u.%u.%u.%u", p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor, p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision); // Detect support for timestamp queries. Some FL9_x devices don't support them. hr = ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL); p->has_timestamp_queries = SUCCEEDED(hr); pl_d3d11_setup_formats(gpu); // The rasterizer state never changes, so create it here D3D11_RASTERIZER_DESC rdesc = { .FillMode = D3D11_FILL_SOLID, .CullMode = D3D11_CULL_NONE, .FrontCounterClockwise = FALSE, .DepthClipEnable = TRUE, // Required for 10level9 .ScissorEnable = TRUE, }; D3D(ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &p->rstate)); // The depth stencil state never changes either, and we only set it to turn // depth testing off so the debug layer doesn't complain about an unbound // depth buffer D3D11_DEPTH_STENCIL_DESC dsdesc = { .DepthEnable = FALSE, .DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL, .DepthFunc = D3D11_COMPARISON_LESS, .StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK, .StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK, .FrontFace = { .StencilFailOp = D3D11_STENCIL_OP_KEEP, .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, .StencilPassOp = D3D11_STENCIL_OP_KEEP, .StencilFunc = D3D11_COMPARISON_ALWAYS, }, .BackFace = { .StencilFailOp = D3D11_STENCIL_OP_KEEP, .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, .StencilPassOp = D3D11_STENCIL_OP_KEEP, .StencilFunc = D3D11_COMPARISON_ALWAYS, }, }; D3D(ID3D11Device_CreateDepthStencilState(p->dev, &dsdesc, &p->dsstate)); // Initialize the samplers for (int sample_mode = 0; sample_mode < PL_TEX_SAMPLE_MODE_COUNT; sample_mode++) { for (int address_mode = 0; address_mode < PL_TEX_ADDRESS_MODE_COUNT; address_mode++) { static const D3D11_TEXTURE_ADDRESS_MODE d3d_address_mode[] = { [PL_TEX_ADDRESS_CLAMP] = D3D11_TEXTURE_ADDRESS_CLAMP, [PL_TEX_ADDRESS_REPEAT] = D3D11_TEXTURE_ADDRESS_WRAP, [PL_TEX_ADDRESS_MIRROR] = D3D11_TEXTURE_ADDRESS_MIRROR, }; static const D3D11_FILTER d3d_filter[] = { [PL_TEX_SAMPLE_NEAREST] = D3D11_FILTER_MIN_MAG_MIP_POINT, [PL_TEX_SAMPLE_LINEAR] = D3D11_FILTER_MIN_MAG_MIP_LINEAR, }; D3D11_SAMPLER_DESC sdesc = { .AddressU = d3d_address_mode[address_mode], .AddressV = d3d_address_mode[address_mode], .AddressW = d3d_address_mode[address_mode], .ComparisonFunc = D3D11_COMPARISON_NEVER, .MinLOD = 0, .MaxLOD = D3D11_FLOAT32_MAX, .MaxAnisotropy = 1, .Filter = d3d_filter[sample_mode], }; D3D(ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->samplers[sample_mode][address_mode])); } } hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter4, (void **) &adapter4); if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC3 adapter_desc3 = {0}; IDXGIAdapter4_GetDesc3(adapter4, &adapter_desc3); p->has_monitored_fences = adapter_desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES; } // Try to create a D3D11.4 fence object to wait on in pl_gpu_finish() if (p->dev5 && p->has_monitored_fences) { hr = ID3D11Device5_CreateFence(p->dev5, 0, D3D11_FENCE_FLAG_NONE, &IID_ID3D11Fence, (void **) p->finish_fence); if (SUCCEEDED(hr)) { p->finish_event = CreateEventW(NULL, FALSE, FALSE, NULL); if (!p->finish_event) { PL_ERR(gpu, "Failed to create finish() event"); goto error; } } } // If fences are not available, we will have to poll a event query instead if (!p->finish_fence) { // Buffers for dummy copy/readback (see d3d11_gpu_finish()) p->finish_buf_src = pl_buf_create(gpu, pl_buf_params( .size = sizeof(uint32_t), .drawable = true, // Make these vertex buffers for 10level9 .initial_data = &(uint32_t) {0x11223344}, )); p->finish_buf_dst = pl_buf_create(gpu, pl_buf_params( .size = sizeof(uint32_t), .host_readable = true, .drawable = true, )); D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &p->finish_query)); } // Create the dispatch last, after any setup of `gpu` is done p->dp = pl_dispatch_create(ctx->log, gpu); pl_d3d11_flush_message_queue(ctx, "After gpu create"); success = true; error: SAFE_RELEASE(dxgi_dev); SAFE_RELEASE(adapter); if (success) { return pl_gpu_finalize(gpu); } else { d3d11_gpu_destroy(gpu); return NULL; } } libplacebo-v4.192.1/src/d3d11/gpu.h000066400000000000000000000151051417677245700165470ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include #include "../gpu.h" #include "common.h" #include "utils.h" pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx); // --- pl_gpu internal structs and helpers // Size of one constant in a constant buffer #define CBUF_ELEM (sizeof(float[4])) struct d3d_stream_buf { UINT bind_flags; ID3D11Buffer *buf; size_t size; size_t used; unsigned int align; }; struct pl_gpu_d3d11 { struct pl_gpu_fns impl; struct d3d11_ctx *ctx; ID3D11Device *dev; ID3D11Device1 *dev1; ID3D11Device5 *dev5; ID3D11DeviceContext *imm; ID3D11DeviceContext1 *imm1; ID3D11DeviceContext4 *imm4; // The Direct3D 11 minor version number int minor; struct spirv_compiler *spirv; pD3DCompile D3DCompile; struct dll_version d3d_compiler_ver; // Device capabilities D3D_FEATURE_LEVEL fl; bool has_timestamp_queries; bool has_monitored_fences; int max_srvs; int max_uavs; // This is a pl_dispatch used on ourselves for the purposes of dispatching // shaders for performing various emulation tasks (e.g. blits). // Warning: As in pl_vk, care must be taken to avoid recursive calls. struct pl_dispatch *dp; // Streaming vertex and index buffers struct d3d_stream_buf vbuf; struct d3d_stream_buf ibuf; // Shared rasterizer state ID3D11RasterizerState *rstate; // Shared depth-stencil state ID3D11DepthStencilState *dsstate; // Array of ID3D11SamplerStates for every combination of sample/address modes ID3D11SamplerState *samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT]; // Resources for finish() ID3D11Fence *finish_fence; uint64_t finish_value; HANDLE finish_event; ID3D11Query *finish_query; pl_buf finish_buf_src; pl_buf finish_buf_dst; }; void pl_d3d11_setup_formats(struct pl_gpu *gpu); void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer); void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer); struct pl_buf_d3d11 { ID3D11Buffer *buf; ID3D11Buffer *staging; ID3D11ShaderResourceView *raw_srv; ID3D11UnorderedAccessView *raw_uav; ID3D11ShaderResourceView *texel_srv; ID3D11UnorderedAccessView *texel_uav; char *data; bool dirty; }; void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf); pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params); void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size); bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size); void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); // Ensure a buffer is up-to-date with its system memory mirror before it is used void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf); struct pl_tex_d3d11 { // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not // hold an additional reference to the texture object. ID3D11Resource *res; ID3D11Texture1D *tex1d; ID3D11Texture2D *tex2d; ID3D11Texture3D *tex3d; int array_slice; // Mirrors one of staging1d, staging2d, or staging3d, and doesn't hold a ref ID3D11Resource *staging; // Staging textures for pl_tex_download ID3D11Texture1D *staging1d; ID3D11Texture2D *staging2d; ID3D11Texture3D *staging3d; ID3D11ShaderResourceView *srv; ID3D11RenderTargetView *rtv; ID3D11UnorderedAccessView *uav; }; void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex); pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params); void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex); void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color); void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Constant buffer layout used for gl_NumWorkGroups emulation struct d3d_num_workgroups_buf { alignas(CBUF_ELEM) uint32_t num_wgs[3]; }; enum { HLSL_BINDING_NOT_USED = -1, // Slot should always be bound as NULL HLSL_BINDING_NUM_WORKGROUPS = -2, // Slot used for gl_NumWorkGroups emulation }; // Represents a specific shader stage in a pl_pass (VS, PS, CS) struct d3d_pass_stage { // Lists for each resource type, to simplify binding in pl_pass_run. Indexes // match the index of the arrays passed to the ID3D11DeviceContext methods. // Entries are the index of pass->params.descriptors which should be bound // in that position, or a HLSL_BINDING_* special value. PL_ARRAY(int) cbvs; PL_ARRAY(int) srvs; PL_ARRAY(int) samplers; }; struct pl_pass_d3d11 { ID3D11PixelShader *ps; ID3D11VertexShader *vs; ID3D11ComputeShader *cs; ID3D11InputLayout *layout; ID3D11BlendState *bstate; // gl_NumWorkGroups emulation struct d3d_num_workgroups_buf last_num_wgs; ID3D11Buffer *num_workgroups_buf; bool num_workgroups_used; // Maximum binding number int max_binding; struct d3d_pass_stage main; // PS and CS struct d3d_pass_stage vertex; // List of resources, as in `struct pass_stage`, except UAVs are shared // between all shader stages PL_ARRAY(int) uavs; // Pre-allocated resource arrays to use in pl_pass_run ID3D11Buffer **cbv_arr; ID3D11ShaderResourceView **srv_arr; ID3D11SamplerState **sampler_arr; ID3D11UnorderedAccessView **uav_arr; }; void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass); const struct pl_pass *pl_d3d11_pass_create(pl_gpu gpu, const struct pl_pass_params *params); void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); libplacebo-v4.192.1/src/d3d11/gpu_buf.c000066400000000000000000000242421417677245700174000ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); SAFE_RELEASE(buf_p->buf); SAFE_RELEASE(buf_p->staging); SAFE_RELEASE(buf_p->raw_srv); SAFE_RELEASE(buf_p->raw_uav); SAFE_RELEASE(buf_p->texel_srv); SAFE_RELEASE(buf_p->texel_uav); pl_d3d11_flush_message_queue(ctx, "After buffer destroy"); pl_free((void *) buf); } pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_d3d11); buf->params = *params; buf->params.initial_data = NULL; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; if (params->uniform && !params->format && (params->storable || params->drawable)) { // TODO: Figure out what to do with these PL_ERR(gpu, "Uniform buffers cannot share any other buffer type"); goto error; } // TODO: Distinguish between uniform buffers and texel uniform buffers. // Currently we assume that if uniform and format are set, it's a texel // buffer and NOT a uniform buffer. if (params->uniform && !params->format) { desc.BindFlags |= D3D11_BIND_CONSTANT_BUFFER; desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, CBUF_ELEM); } if (params->uniform && params->format) { desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; } if (params->storable) { desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, sizeof(float)); desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; } if (params->drawable) { desc.BindFlags |= D3D11_BIND_VERTEX_BUFFER; // In FL9_x, a vertex buffer can't also be an index buffer, so index // buffers are unsupported in FL9_x for now if (p->fl > D3D_FEATURE_LEVEL_9_3) desc.BindFlags |= D3D11_BIND_INDEX_BUFFER; } char *data = NULL; // D3D11 doesn't allow partial constant buffer updates without special // conditions. To support partial buffer updates, keep a mirror of the // buffer data in system memory and upload the whole thing before the buffer // is used. // // Note: We don't use a staging buffer for this because of Intel. // https://github.com/mpv-player/mpv/issues/5293 // https://crbug.com/593024 if (params->uniform && !params->format && params->host_writable) { data = pl_zalloc(buf, desc.ByteWidth); buf_p->data = data; } D3D11_SUBRESOURCE_DATA srdata = { 0 }; if (params->initial_data) { if (desc.ByteWidth != params->size) { // If the size had to be rounded-up, uploading from // params->initial_data is technically undefined behavior, so copy // the initial data to an allocation first if (!data) data = pl_zalloc(buf, desc.ByteWidth); srdata.pSysMem = data; } else { srdata.pSysMem = params->initial_data; } if (data) memcpy(data, params->initial_data, params->size); } D3D(ID3D11Device_CreateBuffer(p->dev, &desc, params->initial_data ? &srdata : NULL, &buf_p->buf)); if (!buf_p->data) pl_free(data); // Create raw views for PL_DESC_BUF_STORAGE if (params->storable) { // A SRV is used for PL_DESC_ACCESS_READONLY D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX, .BufferEx = { .NumElements = PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), .Flags = D3D11_BUFFEREX_SRV_FLAG_RAW, }, }; D3D(ID3D11Device_CreateShaderResourceView(p->dev, (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->raw_srv)); // A UAV is used for all other access modes D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, .Buffer = { .NumElements = PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), .Flags = D3D11_BUFFER_UAV_FLAG_RAW, }, }; D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->raw_uav)); } // Create a typed SRV for PL_BUF_TEXEL_UNIFORM and PL_BUF_TEXEL_STORAGE if (params->format) { D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { .Format = fmt_to_dxgi(params->format), .ViewDimension = D3D11_SRV_DIMENSION_BUFFER, }; D3D(ID3D11Device_CreateShaderResourceView(p->dev, (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->texel_srv)); // Create a typed UAV for PL_BUF_TEXEL_STORAGE if (params->storable) { D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { .Format = fmt_to_dxgi(buf->params.format), .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, }; D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->texel_uav)); } } if (!buf_p->data) { // Create the staging buffer regardless of whether params->host_readable // is set or not, so that buf_copy can copy to system-memory-backed // buffers // TODO: Consider sharing a big staging buffer for this, rather than // having one staging buffer per buffer desc.BindFlags = 0; desc.MiscFlags = 0; desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging)); } pl_d3d11_flush_message_queue(ctx, "After buffer create"); return buf; error: pl_d3d11_buf_destroy(gpu, buf); return NULL; } void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); if (buf_p->data) { memcpy(buf_p->data + offset, data, size); buf_p->dirty = true; } else { ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf, 0, (&(D3D11_BOX) { .left = offset, .top = 0, .front = 0, .right = offset + size, .bottom = 1, .back = 1, }), data, 0, 0); } } void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); if (!buf_p->data || !buf_p->dirty) return; ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf, 0, NULL, buf_p->data, 0, 0); } bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); // If there is a system-memory mirror of the buffer contents, use it if (buf_p->data) { memcpy(dest, buf_p->data + offset, size); return true; } ID3D11DeviceContext_CopyResource(p->imm, (ID3D11Resource *) buf_p->staging, (ID3D11Resource *) buf_p->buf); D3D11_MAPPED_SUBRESOURCE lock; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) buf_p->staging, 0, D3D11_MAP_READ, 0, &lock)); char *csrc = lock.pData; memcpy(dest, csrc + offset, size); ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) buf_p->staging, 0); pl_d3d11_flush_message_queue(ctx, "After buffer read"); return true; error: return false; } void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *src_p = PL_PRIV(src); struct pl_buf_d3d11 *dst_p = PL_PRIV(dst); // Handle system memory copies in case one or both of the buffers has a // system memory mirror if (src_p->data && dst_p->data) { memcpy(dst_p->data + dst_offset, src_p->data + src_offset, size); dst_p->dirty = true; } else if (src_p->data) { pl_d3d11_buf_write(gpu, dst, dst_offset, src_p->data + src_offset, size); } else if (dst_p->data) { if (pl_d3d11_buf_read(gpu, src, src_offset, dst_p->data + dst_offset, size)) { dst_p->dirty = true; } else { PL_ERR(gpu, "Failed to read from GPU during buffer copy"); } } else { ID3D11DeviceContext_CopySubresourceRegion(p->imm, (ID3D11Resource *) dst_p->buf, 0, dst_offset, 0, 0, (ID3D11Resource *) src_p->buf, 0, (&(D3D11_BOX) { .left = src_offset, .top = 0, .front = 0, .right = src_offset + size, .bottom = 1, .back = 1, })); } pl_d3d11_flush_message_queue(ctx, "After buffer copy"); } libplacebo-v4.192.1/src/d3d11/gpu_pass.c000066400000000000000000001371501417677245700175750ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" struct stream_buf_slice { const void *data; unsigned int size; unsigned int offset; }; // Upload one or more slices of single-use data to a suballocated dynamic // buffer. Only call this once per-buffer per-pass, since it will discard or // reallocate the buffer when full. static bool stream_buf_upload(pl_gpu gpu, struct d3d_stream_buf *stream, struct stream_buf_slice *slices, int num_slices) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; unsigned int align = PL_DEF(stream->align, sizeof(float)); // Get total size, rounded up to the buffer's alignment size_t size = 0; for (int i = 0; i < num_slices; i++) size += PL_ALIGN2(slices[i].size, align); if (size > gpu->limits.max_buf_size) { PL_ERR(gpu, "Streaming buffer is too large"); return -1; } // If the data doesn't fit, realloc the buffer if (size > stream->size) { size_t new_size = stream->size; // Arbitrary base size if (!new_size) new_size = 16 * 1024; while (new_size < size) new_size *= 2; new_size = PL_MIN(new_size, gpu->limits.max_buf_size); ID3D11Buffer *new_buf; D3D11_BUFFER_DESC vbuf_desc = { .ByteWidth = new_size, .Usage = D3D11_USAGE_DYNAMIC, .BindFlags = stream->bind_flags, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, }; D3D(ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf)); SAFE_RELEASE(stream->buf); stream->buf = new_buf; stream->size = new_size; stream->used = 0; } bool discard = false; size_t offset = stream->used; if (offset + size > stream->size) { // We reached the end of the buffer, so discard and wrap around discard = true; offset = 0; } D3D11_MAPPED_SUBRESOURCE map = {0}; UINT type = discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) stream->buf, 0, type, 0, &map)); // Upload each slice char *cdata = map.pData; stream->used = offset; for (int i = 0; i < num_slices; i++) { slices[i].offset = stream->used; memcpy(cdata + slices[i].offset, slices[i].data, slices[i].size); stream->used += PL_ALIGN2(slices[i].size, align); } ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) stream->buf, 0); return true; error: return false; } static const char *get_shader_target(pl_gpu gpu, enum glsl_shader_stage stage) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); switch (p->fl) { default: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_5_0"; case GLSL_SHADER_FRAGMENT: return "ps_5_0"; case GLSL_SHADER_COMPUTE: return "cs_5_0"; } break; case D3D_FEATURE_LEVEL_10_1: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_1"; case GLSL_SHADER_FRAGMENT: return "ps_4_1"; case GLSL_SHADER_COMPUTE: return "cs_4_1"; } break; case D3D_FEATURE_LEVEL_10_0: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0"; case GLSL_SHADER_FRAGMENT: return "ps_4_0"; case GLSL_SHADER_COMPUTE: return "cs_4_0"; } break; case D3D_FEATURE_LEVEL_9_3: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; case GLSL_SHADER_COMPUTE: return NULL; } break; case D3D_FEATURE_LEVEL_9_2: case D3D_FEATURE_LEVEL_9_1: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; case GLSL_SHADER_COMPUTE: return NULL; } break; } return NULL; } static SpvExecutionModel stage_to_spv(enum glsl_shader_stage stage) { static const SpvExecutionModel spv_execution_model[] = { [GLSL_SHADER_VERTEX] = SpvExecutionModelVertex, [GLSL_SHADER_FRAGMENT] = SpvExecutionModelFragment, [GLSL_SHADER_COMPUTE] = SpvExecutionModelGLCompute, }; return spv_execution_model[stage]; } #define SC(cmd) \ do { \ spvc_result res = (cmd); \ if (res != SPVC_SUCCESS) { \ PL_ERR(gpu, "%s: %s (%d) (%s:%d)", \ #cmd, sc ? spvc_context_get_last_error_string(sc) : "", \ res, __FILE__, __LINE__); \ goto error; \ } \ } while (0) // Some decorations, like SpvDecorationNonWritable, are actually found on the // members of a buffer block, rather than the buffer block itself. If all // members have a certain decoration, SPIRV-Cross considers it to apply to the // buffer block too, which determines things like whether a SRV or UAV is used // for an SSBO. This function checks if SPIRV-Cross considers a decoration to // apply to a buffer block. static spvc_result buffer_block_has_decoration(spvc_compiler sc_comp, spvc_variable_id id, SpvDecoration decoration, bool *out) { const SpvDecoration *decorations; size_t num_decorations = 0; spvc_result res = spvc_compiler_get_buffer_block_decorations(sc_comp, id, &decorations, &num_decorations); if (res != SPVC_SUCCESS) return res; for (size_t j = 0; j < num_decorations; j++) { if (decorations[j] == decoration) { *out = true; return res; } } *out = false; return res; } static bool alloc_hlsl_reg_bindings(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, spvc_context sc, spvc_compiler sc_comp, spvc_resources resources, spvc_resource_type res_type, enum glsl_shader_stage stage) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); const spvc_reflected_resource *res_list; size_t res_count; SC(spvc_resources_get_resource_list_for_type(resources, res_type, &res_list, &res_count)); // In a raster pass, one of the UAV slots is used by the runtime for the RTV int uav_offset = stage == GLSL_SHADER_COMPUTE ? 0 : 1; int max_uavs = p->max_uavs - uav_offset; for (int i = 0; i < res_count; i++) { unsigned int binding = spvc_compiler_get_decoration(sc_comp, res_list[i].id, SpvDecorationBinding); unsigned int descriptor_set = spvc_compiler_get_decoration(sc_comp, res_list[i].id, SpvDecorationDescriptorSet); if (descriptor_set != 0) continue; pass_p->max_binding = PL_MAX(pass_p->max_binding, binding); spvc_hlsl_resource_binding hlslbind; spvc_hlsl_resource_binding_init(&hlslbind); hlslbind.stage = stage_to_spv(stage); hlslbind.binding = binding; hlslbind.desc_set = descriptor_set; bool has_cbv = false, has_sampler = false, has_srv = false, has_uav = false; switch (res_type) { case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER: has_cbv = true; break; case SPVC_RESOURCE_TYPE_STORAGE_BUFFER:; bool non_writable_bb = false; SC(buffer_block_has_decoration(sc_comp, res_list[i].id, SpvDecorationNonWritable, &non_writable_bb)); if (non_writable_bb) { has_srv = true; } else { has_uav = true; } break; case SPVC_RESOURCE_TYPE_STORAGE_IMAGE:; bool non_writable = spvc_compiler_has_decoration(sc_comp, res_list[i].id, SpvDecorationNonWritable); if (non_writable) { has_srv = true; } else { has_uav = true; } break; case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE: has_srv = true; break; case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE:; spvc_type type = spvc_compiler_get_type_handle(sc_comp, res_list[i].type_id); SpvDim dimension = spvc_type_get_image_dimension(type); // Uniform texel buffers are technically sampled images, but they // aren't sampled from, so don't allocate a sampler if (dimension != SpvDimBuffer) has_sampler = true; has_srv = true; break; default: break; } if (has_cbv) { hlslbind.cbv.register_binding = pass_s->cbvs.num; PL_ARRAY_APPEND(pass, pass_s->cbvs, binding); if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { PL_ERR(gpu, "Too many constant buffers in shader"); goto error; } } if (has_sampler) { hlslbind.sampler.register_binding = pass_s->samplers.num; PL_ARRAY_APPEND(pass, pass_s->samplers, binding); if (pass_s->srvs.num > D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) { PL_ERR(gpu, "Too many samplers in shader"); goto error; } } if (has_srv) { hlslbind.srv.register_binding = pass_s->srvs.num; PL_ARRAY_APPEND(pass, pass_s->srvs, binding); if (pass_s->srvs.num > p->max_srvs) { PL_ERR(gpu, "Too many SRVs in shader"); goto error; } } if (has_uav) { // UAV registers are shared between the vertex and fragment shaders // in a raster pass, so check if the UAV for this resource has // already been allocated bool uav_bound = false; for (int j = 0; j < pass_p->uavs.num; j++) { if (pass_p->uavs.elem[i] == binding) uav_bound = true; } if (!uav_bound) { hlslbind.uav.register_binding = pass_p->uavs.num + uav_offset; PL_ARRAY_APPEND(pass, pass_p->uavs, binding); if (pass_p->uavs.num > max_uavs) { PL_ERR(gpu, "Too many UAVs in shader"); goto error; } } } SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &hlslbind)); } return true; error: return false; } static const char *shader_names[] = { [GLSL_SHADER_VERTEX] = "vertex", [GLSL_SHADER_FRAGMENT] = "fragment", [GLSL_SHADER_COMPUTE] = "compute", }; static ID3DBlob *shader_compile_glsl(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, enum glsl_shader_stage stage, const char *glsl) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); void *tmp = pl_tmp(NULL); spvc_context sc = NULL; spvc_compiler sc_comp = NULL; const char *hlsl = NULL; ID3DBlob *out = NULL; ID3DBlob *errors = NULL; HRESULT hr; clock_t start = clock(); pl_str spirv = spirv_compile_glsl(p->spirv, tmp, &gpu->glsl, stage, glsl); if (!spirv.len) goto error; clock_t after_glsl = clock(); pl_log_cpu_time(gpu->log, start, after_glsl, "translating GLSL to SPIR-V"); SC(spvc_context_create(&sc)); spvc_parsed_ir sc_ir; SC(spvc_context_parse_spirv(sc, (SpvId *) spirv.buf, spirv.len / sizeof(SpvId), &sc_ir)); SC(spvc_context_create_compiler(sc, SPVC_BACKEND_HLSL, sc_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &sc_comp)); spvc_compiler_options sc_opts; SC(spvc_compiler_create_compiler_options(sc_comp, &sc_opts)); int sc_shader_model; if (p->fl >= D3D_FEATURE_LEVEL_11_0) { sc_shader_model = 50; } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { sc_shader_model = 41; } else { sc_shader_model = 40; } SC(spvc_compiler_options_set_uint(sc_opts, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model)); // Unlike Vulkan and OpenGL, in D3D11, the clip-space is "flipped" with // respect to framebuffer-space. In other words, if you render to a pixel at // (0, -1), you have to sample from (0, 1) to get the value back. We unflip // it by setting the following option, which inserts the equivalent of // `gl_Position.y = -gl_Position.y` into the vertex shader if (stage == GLSL_SHADER_VERTEX) { SC(spvc_compiler_options_set_bool(sc_opts, SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE)); } // Bind readonly images and imageBuffers as SRVs. This is done because a lot // of hardware (especially FL11_x hardware) has very poor format support for // reading values from UAVs. It allows the common case of readonly and // writeonly images to support more formats, though the less common case of // readwrite images still requires format support for UAV loads (represented // by the PL_FMT_CAP_READWRITE cap in libplacebo.) // // Note that setting this option comes at the cost of GLSL support. Readonly // and readwrite images are the same type in GLSL, but SRV and UAV bound // textures are different types in HLSL, so for example, a GLSL function // with an image parameter may fail to compile as HLSL if it's called with a // readonly image and a readwrite image at different call sites. SC(spvc_compiler_options_set_bool(sc_opts, SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV, SPVC_TRUE)); SC(spvc_compiler_install_compiler_options(sc_comp, sc_opts)); spvc_set active = NULL; SC(spvc_compiler_get_active_interface_variables(sc_comp, &active)); spvc_resources resources = NULL; SC(spvc_compiler_create_shader_resources_for_active_variables( sc_comp, &resources, active)); // Allocate HLSL registers for each resource type alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_SEPARATE_IMAGE, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_STORAGE_BUFFER, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, stage); if (stage == GLSL_SHADER_COMPUTE) { // Check if the gl_NumWorkGroups builtin is used. If it is, we have to // emulate it with a constant buffer, so allocate it a CBV register. spvc_variable_id num_workgroups_id = spvc_compiler_hlsl_remap_num_workgroups_builtin(sc_comp); if (num_workgroups_id) { pass_p->num_workgroups_used = true; spvc_hlsl_resource_binding binding; spvc_hlsl_resource_binding_init(&binding); binding.stage = stage_to_spv(stage); binding.binding = pass_p->max_binding + 1; // Allocate a CBV register for the buffer binding.cbv.register_binding = pass_s->cbvs.num; PL_ARRAY_APPEND(pass, pass_s->cbvs, HLSL_BINDING_NUM_WORKGROUPS); if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { PL_ERR(gpu, "Not enough constant buffer slots for gl_NumWorkGroups"); goto error; } spvc_compiler_set_decoration(sc_comp, num_workgroups_id, SpvDecorationDescriptorSet, 0); spvc_compiler_set_decoration(sc_comp, num_workgroups_id, SpvDecorationBinding, binding.binding); SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &binding)); } } SC(spvc_compiler_compile(sc_comp, &hlsl)); clock_t after_spvc = clock(); pl_log_cpu_time(gpu->log, after_glsl, after_spvc, "translating SPIR-V to HLSL"); hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", get_shader_target(gpu, stage), D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &out, &errors); if (FAILED(hr)) { SAFE_RELEASE(out); PL_ERR(gpu, "D3DCompile failed: %s\n%.*s", pl_hresult_to_str(hr), (int) ID3D10Blob_GetBufferSize(errors), (char *) ID3D10Blob_GetBufferPointer(errors)); goto error; } pl_log_cpu_time(gpu->log, after_spvc, clock(), "translating HLSL to DXBC"); error:; int level = out ? PL_LOG_DEBUG : PL_LOG_ERR; PL_MSG(gpu, level, "%s shader GLSL source:", shader_names[stage]); pl_msg_source(gpu->ctx, level, glsl); if (hlsl) { PL_MSG(gpu, level, "%s shader HLSL source:", shader_names[stage]); pl_msg_source(gpu->ctx, level, hlsl); } if (sc) spvc_context_destroy(sc); SAFE_RELEASE(errors); pl_free(tmp); return out; } #define CACHE_MAGIC {'P','L','D','3','D',11} #define CACHE_VERSION 2 static const char d3d11_cache_magic[6] = CACHE_MAGIC; struct d3d11_cache_header { char magic[sizeof(d3d11_cache_magic)]; int cache_version; uint64_t hash; bool num_workgroups_used; int num_main_cbvs; int num_main_srvs; int num_main_samplers; int num_vertex_cbvs; int num_vertex_srvs; int num_vertex_samplers; int num_uavs; size_t vert_bc_len; size_t frag_bc_len; size_t comp_bc_len; }; static inline uint64_t pass_cache_signature(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); uint64_t hash = p->spirv->signature; unsigned spvc_major, spvc_minor, spvc_patch; spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch); pl_hash_merge(&hash, spvc_major); pl_hash_merge(&hash, spvc_minor); pl_hash_merge(&hash, spvc_patch); pl_hash_merge(&hash, ((uint64_t)p->d3d_compiler_ver.major << 48) | ((uint64_t)p->d3d_compiler_ver.minor << 32) | ((uint64_t)p->d3d_compiler_ver.build << 16) | (uint64_t)p->d3d_compiler_ver.revision); pl_hash_merge(&hash, p->fl); pl_hash_merge(&hash, pl_str_hash(pl_str0(params->glsl_shader))); if (params->type == PL_PASS_RASTER) pl_hash_merge(&hash, pl_str_hash(pl_str0(params->vertex_shader))); return hash; } static inline size_t cache_payload_size(struct d3d11_cache_header *header) { size_t required = (header->num_main_cbvs + header->num_main_srvs + header->num_main_samplers + header->num_vertex_cbvs + header->num_vertex_srvs + header->num_vertex_samplers + header->num_uavs) * sizeof(int) + header->vert_bc_len + header->frag_bc_len + header->comp_bc_len; return required; } static bool d3d11_use_cached_program(pl_gpu gpu, struct pl_pass *pass, const struct pl_pass_params *params, pl_str *vert_bc, pl_str *frag_bc, pl_str *comp_bc) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); pl_str cache = { .buf = (uint8_t *) params->cached_program, .len = params->cached_program_len, }; if (cache.len < sizeof(struct d3d11_cache_header)) return false; struct d3d11_cache_header *header = (struct d3d11_cache_header *) cache.buf; cache = pl_str_drop(cache, sizeof(*header)); if (strncmp(header->magic, d3d11_cache_magic, sizeof(d3d11_cache_magic)) != 0) return false; if (header->cache_version != CACHE_VERSION) return false; if (header->hash != pass_cache_signature(gpu, params)) return false; // determine required cache size before reading anything size_t required = cache_payload_size(header); if (cache.len < required) return false; pass_p->num_workgroups_used = header->num_workgroups_used; #define GET_ARRAY(object, name, num_elems) \ do { \ PL_ARRAY_MEMDUP(pass, (object)->name, cache.buf, num_elems); \ cache = pl_str_drop(cache, num_elems * sizeof(*(object)->name.elem)); \ } while (0) #define GET_STAGE_ARRAY(stage, name) \ GET_ARRAY(&pass_p->stage, name, header->num_##stage##_##name) GET_STAGE_ARRAY(main, cbvs); GET_STAGE_ARRAY(main, srvs); GET_STAGE_ARRAY(main, samplers); GET_STAGE_ARRAY(vertex, cbvs); GET_STAGE_ARRAY(vertex, srvs); GET_STAGE_ARRAY(vertex, samplers); GET_ARRAY(pass_p, uavs, header->num_uavs); #define GET_SHADER(ptr) \ do { \ if (ptr) \ *ptr = pl_str_take(cache, header->ptr##_len); \ cache = pl_str_drop(cache, header->ptr##_len); \ } while (0) GET_SHADER(vert_bc); GET_SHADER(frag_bc); GET_SHADER(comp_bc); return true; } static void d3d11_update_program_cache(pl_gpu gpu, struct pl_pass *pass, const pl_str *vs_str, const pl_str *ps_str, const pl_str *cs_str) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); struct d3d11_cache_header header = { .magic = CACHE_MAGIC, .cache_version = CACHE_VERSION, .hash = pass_cache_signature(gpu, &pass->params), .num_workgroups_used = pass_p->num_workgroups_used, .num_main_cbvs = pass_p->main.cbvs.num, .num_main_srvs = pass_p->main.srvs.num, .num_main_samplers = pass_p->main.samplers.num, .num_vertex_cbvs = pass_p->vertex.cbvs.num, .num_vertex_srvs = pass_p->vertex.srvs.num, .num_vertex_samplers = pass_p->vertex.samplers.num, .num_uavs = pass_p->uavs.num, .vert_bc_len = vs_str ? vs_str->len : 0, .frag_bc_len = ps_str ? ps_str->len : 0, .comp_bc_len = cs_str ? cs_str->len : 0, }; size_t cache_size = sizeof(header) + cache_payload_size(&header); pl_str cache = {0}; pl_str_append(pass, &cache, (pl_str){ (uint8_t *) &header, sizeof(header) }); #define WRITE_ARRAY(name) pl_str_append(pass, &cache, \ (pl_str){ (uint8_t *) pass_p->name.elem, \ sizeof(*pass_p->name.elem) * pass_p->name.num }) WRITE_ARRAY(main.cbvs); WRITE_ARRAY(main.srvs); WRITE_ARRAY(main.samplers); WRITE_ARRAY(vertex.cbvs); WRITE_ARRAY(vertex.srvs); WRITE_ARRAY(vertex.samplers); WRITE_ARRAY(uavs); if (vs_str) pl_str_append(pass, &cache, *vs_str); if (ps_str) pl_str_append(pass, &cache, *ps_str); if (cs_str) pl_str_append(pass, &cache, *cs_str); pl_assert(cache_size == cache.len); pass->params.cached_program = cache.buf; pass->params.cached_program_len = cache.len; } void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); SAFE_RELEASE(pass_p->vs); SAFE_RELEASE(pass_p->ps); SAFE_RELEASE(pass_p->cs); SAFE_RELEASE(pass_p->layout); SAFE_RELEASE(pass_p->bstate); pl_d3d11_flush_message_queue(ctx, "After pass destroy"); pl_free((void *) pass); } static bool pass_create_raster(pl_gpu gpu, struct pl_pass *pass, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); ID3DBlob *vs_blob = NULL; pl_str vs_str = {0}; ID3DBlob *ps_blob = NULL; pl_str ps_str = {0}; D3D11_INPUT_ELEMENT_DESC *in_descs = NULL; bool success = false; if (d3d11_use_cached_program(gpu, pass, params, &vs_str, &ps_str, NULL)) PL_DEBUG(gpu, "Using cached DXBC shaders"); pl_assert((vs_str.len == 0) == (ps_str.len == 0)); if (vs_str.len == 0) { vs_blob = shader_compile_glsl(gpu, pass, &pass_p->vertex, GLSL_SHADER_VERTEX, params->vertex_shader); if (!vs_blob) goto error; vs_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(vs_blob), .len = ID3D10Blob_GetBufferSize(vs_blob), }; ps_blob = shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_FRAGMENT, params->glsl_shader); if (!ps_blob) goto error; ps_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(ps_blob), .len = ID3D10Blob_GetBufferSize(ps_blob), }; } D3D(ID3D11Device_CreateVertexShader(p->dev, vs_str.buf, vs_str.len, NULL, &pass_p->vs)); D3D(ID3D11Device_CreatePixelShader(p->dev, ps_str.buf, ps_str.len, NULL, &pass_p->ps)); in_descs = pl_calloc_ptr(pass, params->num_vertex_attribs, in_descs); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { // The semantic name doesn't mean much and is just used to verify // the input description matches the shader. SPIRV-Cross always // uses TEXCOORD, so we should too. .SemanticName = "TEXCOORD", .SemanticIndex = va->location, .AlignedByteOffset = va->offset, .Format = fmt_to_dxgi(va->fmt), }; } D3D(ID3D11Device_CreateInputLayout(p->dev, in_descs, params->num_vertex_attribs, vs_str.buf, vs_str.len, &pass_p->layout)); static const D3D11_BLEND blend_options[] = { [PL_BLEND_ZERO] = D3D11_BLEND_ZERO, [PL_BLEND_ONE] = D3D11_BLEND_ONE, [PL_BLEND_SRC_ALPHA] = D3D11_BLEND_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = D3D11_BLEND_INV_SRC_ALPHA, }; D3D11_BLEND_DESC bdesc = { .RenderTarget[0] = { .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, }, }; if (params->blend_params) { bdesc.RenderTarget[0] = (D3D11_RENDER_TARGET_BLEND_DESC) { .BlendEnable = TRUE, .SrcBlend = blend_options[params->blend_params->src_rgb], .DestBlend = blend_options[params->blend_params->dst_rgb], .BlendOp = D3D11_BLEND_OP_ADD, .SrcBlendAlpha = blend_options[params->blend_params->src_alpha], .DestBlendAlpha = blend_options[params->blend_params->dst_alpha], .BlendOpAlpha = D3D11_BLEND_OP_ADD, .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, }; } D3D(ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate)); d3d11_update_program_cache(gpu, pass, &vs_str, &ps_str, NULL); success = true; error: SAFE_RELEASE(vs_blob); SAFE_RELEASE(ps_blob); pl_free(in_descs); return success; } static bool pass_create_compute(pl_gpu gpu, struct pl_pass *pass, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); ID3DBlob *cs_blob = NULL; pl_str cs_str = {0}; bool success = false; if (d3d11_use_cached_program(gpu, pass, params, NULL, NULL, &cs_str)) PL_DEBUG(gpu, "Using cached DXBC shader"); if (cs_str.len == 0) { cs_blob = shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_COMPUTE, params->glsl_shader); if (!cs_blob) goto error; cs_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(cs_blob), .len = ID3D10Blob_GetBufferSize(cs_blob), }; } D3D(ID3D11Device_CreateComputeShader(p->dev, cs_str.buf, cs_str.len, NULL, &pass_p->cs)); if (pass_p->num_workgroups_used) { D3D11_BUFFER_DESC bdesc = { .BindFlags = D3D11_BIND_CONSTANT_BUFFER, .ByteWidth = sizeof(pass_p->last_num_wgs), }; D3D(ID3D11Device_CreateBuffer(p->dev, &bdesc, NULL, &pass_p->num_workgroups_buf)); } d3d11_update_program_cache(gpu, pass, NULL, NULL, &cs_str); success = true; error: SAFE_RELEASE(cs_blob); return success; } const struct pl_pass *pl_d3d11_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_d3d11); pass->params = pl_pass_params_copy(pass, params); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); *pass_p = (struct pl_pass_d3d11) { .max_binding = -1, }; if (params->type == PL_PASS_COMPUTE) { if (!pass_create_compute(gpu, pass, params)) goto error; } else { if (!pass_create_raster(gpu, pass, params)) goto error; } // Pre-allocate resource arrays to use in pl_pass_run pass_p->cbv_arr = pl_calloc(pass, PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num), sizeof(*pass_p->cbv_arr)); pass_p->srv_arr = pl_calloc(pass, PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num), sizeof(*pass_p->srv_arr)); pass_p->sampler_arr = pl_calloc(pass, PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num), sizeof(*pass_p->sampler_arr)); pass_p->uav_arr = pl_calloc(pass, pass_p->uavs.num, sizeof(*pass_p->uav_arr)); // Find the highest binding number used in `params->descriptors` if we // haven't found it already. (If the shader was compiled fresh rather than // loaded from cache, `pass_p->max_binding` should already be set.) if (pass_p->max_binding == -1) { for (int i = 0; i < params->num_descriptors; i++) { pass_p->max_binding = PL_MAX(pass_p->max_binding, params->descriptors[i].binding); } } // Build a mapping from binding numbers to descriptor array indexes int *binding_map = pl_calloc_ptr(pass, pass_p->max_binding + 1, binding_map); for (int i = 0; i <= pass_p->max_binding; i++) binding_map[i] = HLSL_BINDING_NOT_USED; for (int i = 0; i < params->num_descriptors; i++) binding_map[params->descriptors[i].binding] = i; #define MAP_RESOURCES(array) \ do { \ for (int i = 0; i < array.num; i++) { \ if (array.elem[i] > pass_p->max_binding) { \ array.elem[i] = HLSL_BINDING_NOT_USED; \ } else if (array.elem[i] >= 0) { \ array.elem[i] = binding_map[array.elem[i]]; \ } \ } \ } while (0) // During shader compilation (or after loading a compiled shader from cache) // the entries of the following resource lists are shader binding numbers, // however, it's more efficient for `pl_pass_run` if they refer to indexes // of the `params->descriptors` array instead, so remap them here MAP_RESOURCES(pass_p->main.cbvs); MAP_RESOURCES(pass_p->main.samplers); MAP_RESOURCES(pass_p->main.srvs); MAP_RESOURCES(pass_p->vertex.cbvs); MAP_RESOURCES(pass_p->vertex.samplers); MAP_RESOURCES(pass_p->vertex.srvs); MAP_RESOURCES(pass_p->uavs); pl_free(binding_map); pl_d3d11_flush_message_queue(ctx, "After pass create"); return pass; error: pl_d3d11_pass_destroy(gpu, pass); return NULL; } // Shared logic between VS, PS and CS for filling the resource arrays that are // passed to ID3D11DeviceContext methods static void fill_resources(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, const struct pl_pass_run_params *params, ID3D11Buffer **cbvs, ID3D11ShaderResourceView **srvs, ID3D11SamplerState **samplers) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); for (int i = 0; i < pass_s->cbvs.num; i++) { int binding = pass_s->cbvs.elem[i]; if (binding == HLSL_BINDING_NUM_WORKGROUPS) { cbvs[i] = pass_p->num_workgroups_buf; continue; } else if (binding < 0) { cbvs[i] = NULL; continue; } pl_buf buf = params->desc_bindings[binding].object; pl_d3d11_buf_resolve(gpu, buf); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); cbvs[i] = buf_p->buf; } for (int i = 0; i < pass_s->srvs.num; i++) { int binding = pass_s->srvs.elem[i]; if (binding < 0) { srvs[i] = NULL; continue; } pl_tex tex; struct pl_tex_d3d11 *tex_p; pl_buf buf; struct pl_buf_d3d11 *buf_p; switch (pass->params.descriptors[binding].type) { case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: tex = params->desc_bindings[binding].object; tex_p = PL_PRIV(tex); srvs[i] = tex_p->srv; break; case PL_DESC_BUF_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); srvs[i] = buf_p->raw_srv; break; case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); srvs[i] = buf_p->texel_srv; break; default: break; } } for (int i = 0; i < pass_s->samplers.num; i++) { int binding = pass_s->samplers.elem[i]; if (binding < 0) { samplers[i] = NULL; continue; } struct pl_desc_binding *db = ¶ms->desc_bindings[binding]; samplers[i] = p->samplers[db->sample_mode][db->address_mode]; } } static void fill_uavs(pl_pass pass, const struct pl_pass_run_params *params, ID3D11UnorderedAccessView **uavs) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); for (int i = 0; i < pass_p->uavs.num; i++) { int binding = pass_p->uavs.elem[i]; if (binding < 0) { uavs[i] = NULL; continue; } pl_tex tex; struct pl_tex_d3d11 *tex_p; pl_buf buf; struct pl_buf_d3d11 *buf_p; switch (pass->params.descriptors[binding].type) { case PL_DESC_BUF_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); uavs[i] = buf_p->raw_uav; break; case PL_DESC_STORAGE_IMG: tex = params->desc_bindings[binding].object; tex_p = PL_PRIV(tex); uavs[i] = tex_p->uav; break; case PL_DESC_BUF_TEXEL_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); uavs[i] = buf_p->texel_uav; break; default: break; } } } static void pass_run_raster(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_pass pass = params->pass; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); if (p->fl <= D3D_FEATURE_LEVEL_9_3 && params->index_buf) { // Index buffers are unsupported because we can't tell if they are an // index buffer or a vertex buffer on creation, and FL9_x allows only // one binding type per-buffer PL_ERR(gpu, "Index buffers are unsupported in FL9_x"); return; } if (p->fl <= D3D_FEATURE_LEVEL_9_1 && params->index_data && params->index_fmt != PL_INDEX_UINT16) { PL_ERR(gpu, "32-bit index format is unsupported in FL9_1"); return; } // Figure out how much vertex/index data to upload, if any size_t vertex_alloc = params->vertex_data ? pl_vertex_buf_size(params) : 0; size_t index_alloc = params->index_data ? pl_index_buf_size(params) : 0; static const DXGI_FORMAT index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = DXGI_FORMAT_R16_UINT, [PL_INDEX_UINT32] = DXGI_FORMAT_R32_UINT, }; // Upload vertex data. On >=FL10_0 we use the same buffer for index data, so // upload that too. bool share_vertex_index_buf = p->fl > D3D_FEATURE_LEVEL_9_3; if (vertex_alloc || (share_vertex_index_buf && index_alloc)) { struct stream_buf_slice slices[] = { { .data = params->vertex_data, .size = vertex_alloc }, { .data = params->index_data, .size = index_alloc }, }; if (!stream_buf_upload(gpu, &p->vbuf, slices, share_vertex_index_buf ? 2 : 1)) { PL_ERR(gpu, "Failed to upload vertex data"); return; } if (vertex_alloc) { ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &p->vbuf.buf, &(UINT) { pass->params.vertex_stride }, &slices[0].offset); } if (share_vertex_index_buf && index_alloc) { ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->vbuf.buf, index_fmts[params->index_fmt], slices[1].offset); } } // Upload index data for <=FL9_3, which must be in its own buffer if (!share_vertex_index_buf && index_alloc) { struct stream_buf_slice slices[] = { { .data = params->index_data, .size = index_alloc }, }; if (!stream_buf_upload(gpu, &p->ibuf, slices, PL_ARRAY_SIZE(slices))) { PL_ERR(gpu, "Failed to upload index data"); return; } ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->ibuf.buf, index_fmts[params->index_fmt], slices[0].offset); } if (params->vertex_buf) { struct pl_buf_d3d11 *buf_p = PL_PRIV(params->vertex_buf); ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &buf_p->buf, &(UINT) { pass->params.vertex_stride }, &(UINT) { params->buf_offset }); } if (params->index_buf) { struct pl_buf_d3d11 *buf_p = PL_PRIV(params->index_buf); ID3D11DeviceContext_IASetIndexBuffer(p->imm, buf_p->buf, index_fmts[params->index_fmt], params->index_offset); } ID3D11DeviceContext_IASetInputLayout(p->imm, pass_p->layout); static const D3D_PRIMITIVE_TOPOLOGY prim_topology[] = { [PL_PRIM_TRIANGLE_LIST] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, [PL_PRIM_TRIANGLE_STRIP] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, }; ID3D11DeviceContext_IASetPrimitiveTopology(p->imm, prim_topology[pass->params.vertex_type]); ID3D11DeviceContext_VSSetShader(p->imm, pass_p->vs, NULL, 0); ID3D11Buffer **cbvs = pass_p->cbv_arr; ID3D11ShaderResourceView **srvs = pass_p->srv_arr; ID3D11SamplerState **samplers = pass_p->sampler_arr; ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; // Set vertex shader resources. The device context is called conditionally // because the debug layer complains if these are called with 0 resources. fill_resources(gpu, pass, &pass_p->vertex, params, cbvs, srvs, samplers); if (pass_p->vertex.cbvs.num) ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); if (pass_p->vertex.srvs.num) ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); if (pass_p->vertex.samplers.num) ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); ID3D11DeviceContext_RSSetState(p->imm, p->rstate); ID3D11DeviceContext_RSSetViewports(p->imm, 1, (&(D3D11_VIEWPORT) { .TopLeftX = params->viewport.x0, .TopLeftY = params->viewport.y0, .Width = pl_rect_w(params->viewport), .Height = pl_rect_h(params->viewport), .MinDepth = 0, .MaxDepth = 1, })); ID3D11DeviceContext_RSSetScissorRects(p->imm, 1, (&(D3D11_RECT) { .left = params->scissors.x0, .top = params->scissors.y0, .right = params->scissors.x1, .bottom = params->scissors.y1, })); ID3D11DeviceContext_PSSetShader(p->imm, pass_p->ps, NULL, 0); // Set pixel shader resources fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); if (pass_p->main.cbvs.num) ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); ID3D11DeviceContext_OMSetBlendState(p->imm, pass_p->bstate, NULL, D3D11_DEFAULT_SAMPLE_MASK); ID3D11DeviceContext_OMSetDepthStencilState(p->imm, p->dsstate, 0); fill_uavs(pass, params, uavs); struct pl_tex_d3d11 *target_p = PL_PRIV(params->target); ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( p->imm, 1, &target_p->rtv, NULL, 1, pass_p->uavs.num, uavs, NULL); if (params->index_data || params->index_buf) { ID3D11DeviceContext_DrawIndexed(p->imm, params->vertex_count, 0, 0); } else { ID3D11DeviceContext_Draw(p->imm, params->vertex_count, 0); } // Unbind everything. It's easier to do this than to actually track state, // and if we leave the RTV bound, it could trip up D3D's conflict checker. // Also, apparently unbinding SRVs can prevent a 10level9 bug? // https://docs.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-prevent-null-srvs for (int i = 0; i < PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num); i++) cbvs[i] = NULL; for (int i = 0; i < PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num); i++) srvs[i] = NULL; for (int i = 0; i < PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num); i++) samplers[i] = NULL; for (int i = 0; i < pass_p->uavs.num; i++) uavs[i] = NULL; if (pass_p->vertex.cbvs.num) ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); if (pass_p->vertex.srvs.num) ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); if (pass_p->vertex.samplers.num) ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); if (pass_p->main.cbvs.num) ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( p->imm, 0, NULL, NULL, 1, pass_p->uavs.num, uavs, NULL); } static void pass_run_compute(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_pass pass = params->pass; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); // Update gl_NumWorkGroups emulation buffer if necessary if (pass_p->num_workgroups_used) { bool needs_update = false; for (int i = 0; i < 3; i++) { if (pass_p->last_num_wgs.num_wgs[i] != params->compute_groups[i]) needs_update = true; pass_p->last_num_wgs.num_wgs[i] = params->compute_groups[i]; } if (needs_update) { ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) pass_p->num_workgroups_buf, 0, NULL, &pass_p->last_num_wgs, 0, 0); } } ID3D11DeviceContext_CSSetShader(p->imm, pass_p->cs, NULL, 0); ID3D11Buffer **cbvs = pass_p->cbv_arr; ID3D11ShaderResourceView **srvs = pass_p->srv_arr; ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; ID3D11SamplerState **samplers = pass_p->sampler_arr; fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); fill_uavs(pass, params, uavs); if (pass_p->main.cbvs.num) ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); if (pass_p->uavs.num) ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); ID3D11DeviceContext_Dispatch(p->imm, params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); // Unbind everything for (int i = 0; i < pass_p->main.cbvs.num; i++) cbvs[i] = NULL; for (int i = 0; i < pass_p->main.srvs.num; i++) srvs[i] = NULL; for (int i = 0; i < pass_p->main.samplers.num; i++) samplers[i] = NULL; for (int i = 0; i < pass_p->uavs.num; i++) uavs[i] = NULL; if (pass_p->main.cbvs.num) ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); if (pass_p->uavs.num) ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); } void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; pl_pass pass = params->pass; pl_d3d11_timer_start(gpu, params->timer); if (pass->params.type == PL_PASS_COMPUTE) { pass_run_compute(gpu, params); } else { pass_run_raster(gpu, params); } pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After pass run"); } libplacebo-v4.192.1/src/d3d11/gpu_tex.c000066400000000000000000000543731417677245700174340ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" static inline UINT tex_subresource(pl_tex tex) { struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); return tex_p->array_slice >= 0 ? tex_p->array_slice : 0; } static bool tex_init(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); // View formats may be omitted when they match the texture format, but for // simplicity's sake we always set it. It will match the texture format for // textures created with tex_create, but it can be different for video // textures wrapped with pl_d3d11_wrap. DXGI_FORMAT fmt = fmt_to_dxgi(tex->params.format); if (tex->params.sampleable || tex->params.storable) { D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; srvdesc.Texture1DArray.MipLevels = 1; srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; srvdesc.Texture1DArray.ArraySize = 1; } else { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; srvdesc.Texture1D.MipLevels = 1; } break; case 2: if (tex_p->array_slice >= 0) { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; srvdesc.Texture2DArray.MipLevels = 1; srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; srvdesc.Texture2DArray.ArraySize = 1; } else { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; srvdesc.Texture2D.MipLevels = 1; } break; case 3: // D3D11 does not have Texture3D arrays srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srvdesc.Texture3D.MipLevels = 1; break; } D3D(ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, &tex_p->srv)); } if (tex->params.renderable) { D3D11_RENDER_TARGET_VIEW_DESC rtvdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1DARRAY; rtvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; rtvdesc.Texture1DArray.ArraySize = 1; } else { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D; } break; case 2: if (tex_p->array_slice >= 0) { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; rtvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; rtvdesc.Texture2DArray.ArraySize = 1; } else { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; } break; case 3: // D3D11 does not have Texture3D arrays rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D; rtvdesc.Texture3D.WSize = -1; break; } D3D(ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, &rtvdesc, &tex_p->rtv)); } if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) { D3D11_UNORDERED_ACCESS_VIEW_DESC uavdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1DARRAY; uavdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; uavdesc.Texture1DArray.ArraySize = 1; } else { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1D; } break; case 2: if (tex_p->array_slice >= 0) { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2DARRAY; uavdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; uavdesc.Texture2DArray.ArraySize = 1; } else { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; } break; case 3: // D3D11 does not have Texture3D arrays uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D; uavdesc.Texture3D.WSize = -1; break; } D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, &uavdesc, &tex_p->uav)); } return true; error: return false; } void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); SAFE_RELEASE(tex_p->srv); SAFE_RELEASE(tex_p->rtv); SAFE_RELEASE(tex_p->uav); SAFE_RELEASE(tex_p->res); SAFE_RELEASE(tex_p->staging); pl_d3d11_flush_message_queue(ctx, "After texture destroy"); pl_free((void *) tex); } pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); DXGI_FORMAT dxfmt = fmt_to_dxgi(params->format); D3D11_USAGE usage = D3D11_USAGE_DEFAULT; D3D11_BIND_FLAG bind_flags = 0; if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // On >=FL11_0, blit emulation needs image storage tex->params.storable |= params->blit_src || params->blit_dst; // Blit emulation can use a sampler for linear filtering during stretch if ((tex->params.format->caps & PL_FMT_CAP_LINEAR) && params->blit_src) tex->params.sampleable = true; } else { // On params.sampleable |= params->blit_src; tex->params.renderable |= params->blit_dst; } if (tex->params.sampleable) bind_flags |= D3D11_BIND_SHADER_RESOURCE; if (tex->params.renderable) bind_flags |= D3D11_BIND_RENDER_TARGET; if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) bind_flags |= D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; // Apparently IMMUTABLE textures are efficient, so try to infer whether we // can use one if (params->initial_data && !tex->params.renderable && !tex->params.storable && !params->host_writable) usage = D3D11_USAGE_IMMUTABLE; // In FL9_x, resources with only D3D11_BIND_SHADER_RESOURCE can't be copied // from GPU-accessible memory to CPU-accessible memory. The only other bind // flag we set on this FL is D3D11_BIND_RENDER_TARGET, so set it. if (p->fl <= D3D_FEATURE_LEVEL_9_3 && tex->params.host_readable) bind_flags |= D3D11_BIND_RENDER_TARGET; // In FL9_x, when using DEFAULT or IMMUTABLE, BindFlags cannot be zero if (p->fl <= D3D_FEATURE_LEVEL_9_3 && !bind_flags) bind_flags |= D3D11_BIND_SHADER_RESOURCE; D3D11_SUBRESOURCE_DATA data; D3D11_SUBRESOURCE_DATA *pdata = NULL; if (params->initial_data) { data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data, .SysMemPitch = params->w * params->format->texel_size, }; if (params->d) data.SysMemSlicePitch = data.SysMemPitch * params->h; pdata = &data; } switch (pl_tex_params_dimension(*params)) { case 1:; D3D11_TEXTURE1D_DESC desc1d = { .Width = params->w, .MipLevels = 1, .ArraySize = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d)); tex_p->res = (ID3D11Resource *)tex_p->tex1d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc1d.BindFlags = 0; desc1d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc1d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, NULL, &tex_p->staging1d)); tex_p->staging = (ID3D11Resource *) tex_p->staging1d; } break; case 2:; D3D11_TEXTURE2D_DESC desc2d = { .Width = params->w, .Height = params->h, .MipLevels = 1, .ArraySize = 1, .SampleDesc.Count = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d)); tex_p->res = (ID3D11Resource *)tex_p->tex2d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc2d.BindFlags = 0; desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc2d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL, &tex_p->staging2d)); tex_p->staging = (ID3D11Resource *) tex_p->staging2d; } break; case 3:; D3D11_TEXTURE3D_DESC desc3d = { .Width = params->w, .Height = params->h, .Depth = params->d, .MipLevels = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d)); tex_p->res = (ID3D11Resource *)tex_p->tex3d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc3d.BindFlags = 0; desc3d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc3d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, NULL, &tex_p->staging3d)); tex_p->staging = (ID3D11Resource *) tex_p->staging3d; } break; default: pl_unreachable(); } tex_p->array_slice = -1; if (!tex_init(gpu, tex)) goto error; pl_d3d11_flush_message_queue(ctx, "After texture create"); return tex; error: pl_d3d11_tex_destroy(gpu, tex); return NULL; } pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; D3D11_USAGE usage = D3D11_USAGE_DEFAULT; D3D11_BIND_FLAG bind_flags = 0; UINT mip_levels = 1; UINT array_size = 1; UINT sample_count = 1; D3D11_RESOURCE_DIMENSION type; ID3D11Resource_GetType(params->tex, &type); switch (type) { case D3D11_RESOURCE_DIMENSION_TEXTURE1D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture1D, (void **) &tex_p->tex1d)); tex_p->res = (ID3D11Resource *) tex_p->tex1d; D3D11_TEXTURE1D_DESC desc1d; ID3D11Texture1D_GetDesc(tex_p->tex1d, &desc1d); tex->params.w = desc1d.Width; mip_levels = desc1d.MipLevels; array_size = desc1d.ArraySize; fmt = desc1d.Format; usage = desc1d.Usage; bind_flags = desc1d.BindFlags; break; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture2D, (void **) &tex_p->tex2d)); tex_p->res = (ID3D11Resource *) tex_p->tex2d; D3D11_TEXTURE2D_DESC desc2d; ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); tex->params.w = desc2d.Width; tex->params.h = desc2d.Height; mip_levels = desc2d.MipLevels; array_size = desc2d.ArraySize; fmt = desc2d.Format; sample_count = desc2d.SampleDesc.Count; usage = desc2d.Usage; bind_flags = desc2d.BindFlags; // Allow the format and size of 2D textures to be overridden to support // shader views of video resources if (params->fmt) { fmt = params->fmt; tex->params.w = params->w; tex->params.h = params->h; } break; case D3D11_RESOURCE_DIMENSION_TEXTURE3D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture3D, (void **) &tex_p->tex3d)); tex_p->res = (ID3D11Resource *) tex_p->tex3d; D3D11_TEXTURE3D_DESC desc3d; ID3D11Texture3D_GetDesc(tex_p->tex3d, &desc3d); tex->params.w = desc3d.Width; tex->params.h = desc3d.Height; tex->params.d = desc3d.Depth; mip_levels = desc3d.MipLevels; fmt = desc3d.Format; usage = desc3d.Usage; bind_flags = desc3d.BindFlags; break; case D3D11_RESOURCE_DIMENSION_UNKNOWN: case D3D11_RESOURCE_DIMENSION_BUFFER: PL_ERR(gpu, "Resource is not suitable to wrap"); goto error; } if (mip_levels != 1) { PL_ERR(gpu, "Mipmapped textures not supported for wrapping"); goto error; } if (sample_count != 1) { PL_ERR(gpu, "Multisampled textures not supported for wrapping"); goto error; } if (usage != D3D11_USAGE_DEFAULT) { PL_ERR(gpu, "Resource is not D3D11_USAGE_DEFAULT"); goto error; } if (array_size > 1) { if (params->array_slice < 0 || params->array_slice >= array_size) { PL_ERR(gpu, "array_slice out of range"); goto error; } tex_p->array_slice = params->array_slice; } else { tex_p->array_slice = -1; } if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { tex->params.sampleable = true; // Blit emulation uses a render pass on fl < D3D_FEATURE_LEVEL_11_0) tex->params.blit_src = true; } if (bind_flags & D3D11_BIND_RENDER_TARGET) { tex->params.renderable = true; // Blit emulation uses a render pass on fl < D3D_FEATURE_LEVEL_11_0) tex->params.blit_dst = true; } static const D3D11_BIND_FLAG storable_flags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; if ((bind_flags & storable_flags) == storable_flags) { tex->params.storable = true; // Blit emulation uses image storage on >=FL11_0. A feature level check // isn't required because params.blit_src = tex->params.blit_dst = true; } for (int i = 0; i < gpu->num_formats; i++) { DXGI_FORMAT target_fmt = fmt_to_dxgi(gpu->formats[i]); if (fmt == target_fmt) { tex->params.format = gpu->formats[i]; break; } } if (!tex->params.format) { PL_ERR(gpu, "Could not find a suitable pl_fmt for wrapped resource"); goto error; } if (!tex_init(gpu, tex)) goto error; pl_d3d11_flush_message_queue(ctx, "After texture wrap"); return tex; error: pl_d3d11_tex_destroy(gpu, tex); return NULL; } void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); // Resource discarding requires D3D11.1 if (!p->imm1) return; // Prefer discarding a view to discarding the whole resource. The reason // for this is that a pl_tex can refer to a single member of a texture // array. Discarding the SRV, RTV or UAV should only discard that member. if (tex_p->rtv) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->rtv); } else if (tex_p->uav) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->uav); } else if (tex_p->srv) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->srv); } else if (tex_p->array_slice < 0) { // If there are no views, only discard if the ID3D11Resource is not a // texture array ID3D11DeviceContext1_DiscardResource(p->imm1, tex_p->res); } pl_d3d11_flush_message_queue(ctx, "After texture invalidate"); } void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); if (tex->params.format->type == PL_FMT_UINT) { if (tex_p->uav) { ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, color.u); } else { float c[4] = { color.u[0], color.u[1], color.u[2], color.u[3] }; ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); } } else if (tex->params.format->type == PL_FMT_SINT) { if (tex_p->uav) { ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, (const uint32_t *)color.i); } else { float c[4] = { color.i[0], color.i[1], color.i[2], color.i[3] }; ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); } } else if (tex_p->rtv) { ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, color.f); } else { ID3D11DeviceContext_ClearUnorderedAccessViewFloat(p->imm, tex_p->uav, color.f); } pl_d3d11_flush_message_queue(ctx, "After texture clear"); } #define pl_rect3d_to_box(rc) \ ((D3D11_BOX) { \ .left = rc.x0, .top = rc.y0, .front = rc.z0, \ .right = rc.x1, .bottom = rc.y1, .back = rc.z1, \ }) void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *src_p = PL_PRIV(params->src); DXGI_FORMAT src_fmt = fmt_to_dxgi(params->src->params.format); struct pl_tex_d3d11 *dst_p = PL_PRIV(params->dst); DXGI_FORMAT dst_fmt = fmt_to_dxgi(params->dst->params.format); // If the blit operation doesn't require flipping, scaling or format // conversion, we can use CopySubresourceRegion struct pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; if (pl_rect3d_eq(src_rc, dst_rc) && src_fmt == dst_fmt) { struct pl_rect3d rc = params->src_rc; pl_rect3d_normalize(&rc); ID3D11DeviceContext_CopySubresourceRegion(p->imm, dst_p->res, tex_subresource(params->dst), rc.x0, rc.y0, rc.z0, src_p->res, tex_subresource(params->src), &pl_rect3d_to_box(rc)); } else if (p->fl >= D3D_FEATURE_LEVEL_11_0) { if (!pl_tex_blit_compute(gpu, p->dp, params)) PL_ERR(gpu, "Failed compute shader fallback blit"); } else { pl_tex_blit_raster(gpu, p->dp, params); } pl_d3d11_flush_message_queue(ctx, "After texture blit"); } bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; pl_tex tex = params->tex; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); pl_d3d11_timer_start(gpu, params->timer); ID3D11DeviceContext_UpdateSubresource(p->imm, tex_p->res, tex_subresource(tex), &pl_rect3d_to_box(params->rc), params->ptr, params->row_pitch, params->depth_pitch); pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After texture upload"); return true; } bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; const struct pl_tex *tex = params->tex; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); if (!tex_p->staging) return false; pl_d3d11_timer_start(gpu, params->timer); ID3D11DeviceContext_CopySubresourceRegion(p->imm, (ID3D11Resource *) tex_p->staging, 0, params->rc.x0, params->rc.y0, params->rc.z0, tex_p->res, tex_subresource(tex), &pl_rect3d_to_box(params->rc)); D3D11_MAPPED_SUBRESOURCE lock; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) tex_p->staging, 0, D3D11_MAP_READ, 0, &lock)); char *cdst = params->ptr; char *csrc = lock.pData; size_t line_size = pl_rect_w(params->rc) * tex->params.format->texel_size; for (int z = 0; z < pl_rect_d(params->rc); z++) { for (int y = 0; y < pl_rect_h(params->rc); y++) { memcpy(cdst + z * params->depth_pitch + y * params->row_pitch, csrc + (params->rc.z0 + z) * lock.DepthPitch + (params->rc.y0 + y) * lock.RowPitch + params->rc.x0, line_size); } } ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource*)tex_p->staging, 0); pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After texture download"); return true; error: return false; } libplacebo-v4.192.1/src/d3d11/swapchain.c000066400000000000000000000270351417677245700177310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "gpu.h" #include "swapchain.h" struct priv { struct d3d11_ctx *ctx; IDXGISwapChain *swapchain; pl_tex backbuffer; }; static void d3d11_sw_destroy(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); pl_tex_destroy(sw->gpu, &p->backbuffer); SAFE_RELEASE(p->swapchain); pl_free((void *) sw); } static int d3d11_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; UINT max_latency; IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); return max_latency; } static pl_tex get_backbuffer(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; ID3D11Texture2D *backbuffer = NULL; pl_tex tex = NULL; D3D(IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, (void **) &backbuffer)); tex = pl_d3d11_wrap(sw->gpu, pl_d3d11_wrap_params( .tex = (ID3D11Resource *) backbuffer, )); error: SAFE_RELEASE(backbuffer); return tex; } static bool d3d11_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; DXGI_SWAP_CHAIN_DESC desc = {0}; IDXGISwapChain_GetDesc(p->swapchain, &desc); int w = PL_DEF(*width, desc.BufferDesc.Width); int h = PL_DEF(*height, desc.BufferDesc.Height); if (w != desc.BufferDesc.Width || h != desc.BufferDesc.Height) { if (p->backbuffer) { PL_ERR(sw, "Tried resizing the swapchain while a frame was in " "progress! Please submit the current frame first."); return false; } D3D(IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h, DXGI_FORMAT_UNKNOWN, desc.Flags)); } *width = w; *height = h; return true; error: return false; } static bool d3d11_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; if (ctx->is_failed) return false; if (p->backbuffer) { PL_ERR(sw, "Attempted calling `pl_swapchain_start_frame` while a frame " "was already in progress! Call `pl_swapchain_submit_frame` first."); return false; } p->backbuffer = get_backbuffer(sw); if (!p->backbuffer) return false; *out_frame = (struct pl_swapchain_frame) { .fbo = p->backbuffer, .flipped = false, .color_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = PL_ALPHA_UNKNOWN, .bits = { .sample_depth = 8, .color_depth = 8, }, }, .color_space = pl_color_space_monitor, }; return true; } static bool d3d11_sw_submit_frame(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; // Release the backbuffer. We shouldn't hold onto it unnecessarily, because // it prevents external code from resizing the swapchain, which we'd // otherwise support just fine. pl_tex_destroy(sw->gpu, &p->backbuffer); return !ctx->is_failed; } static void d3d11_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; // Present can fail with a device removed error D3D(IDXGISwapChain_Present(p->swapchain, 1, 0)); error: return; } IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); IDXGISwapChain_AddRef(p->swapchain); return p->swapchain; } static struct pl_sw_fns d3d11_swapchain = { .destroy = d3d11_sw_destroy, .latency = d3d11_sw_latency, .resize = d3d11_sw_resize, .start_frame = d3d11_sw_start_frame, .submit_frame = d3d11_sw_submit_frame, .swap_buffers = d3d11_sw_swap_buffers, }; static HRESULT create_swapchain_1_2(struct d3d11_ctx *ctx, IDXGIFactory2 *factory, const struct pl_d3d11_swapchain_params *params, bool flip, UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out) { IDXGISwapChain *swapchain = NULL; IDXGISwapChain1 *swapchain1 = NULL; HRESULT hr; DXGI_SWAP_CHAIN_DESC1 desc = { .Width = width, .Height = height, .Format = format, .SampleDesc.Count = 1, .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, .Flags = params->flags, }; if (ID3D11Device_GetFeatureLevel(ctx->dev) >= D3D_FEATURE_LEVEL_11_0) desc.BufferUsage |= DXGI_USAGE_UNORDERED_ACCESS; if (flip) { UINT max_latency; IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); // Make sure we have at least enough buffers to allow `max_latency` // frames in-flight at once, plus one frame for the frontbuffer desc.BufferCount = max_latency + 1; if (IsWindows10OrGreater()) { desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; } else { desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; } desc.BufferCount = PL_MIN(desc.BufferCount, DXGI_MAX_SWAP_CHAIN_BUFFERS); } else { desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; desc.BufferCount = 1; } if (params->window) { hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *) ctx->dev, params->window, &desc, NULL, NULL, &swapchain1); } else if (params->core_window) { hr = IDXGIFactory2_CreateSwapChainForCoreWindow(factory, (IUnknown *) ctx->dev, params->core_window, &desc, NULL, &swapchain1); } else { hr = IDXGIFactory2_CreateSwapChainForComposition(factory, (IUnknown *) ctx->dev, &desc, NULL, &swapchain1); } if (FAILED(hr)) goto done; hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, (void **) &swapchain); if (FAILED(hr)) goto done; *swapchain_out = swapchain; swapchain = NULL; done: SAFE_RELEASE(swapchain1); SAFE_RELEASE(swapchain); return hr; } static HRESULT create_swapchain_1_1(struct d3d11_ctx *ctx, IDXGIFactory1 *factory, const struct pl_d3d11_swapchain_params *params, UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out) { DXGI_SWAP_CHAIN_DESC desc = { .BufferDesc = { .Width = width, .Height = height, .Format = format, }, .SampleDesc.Count = 1, .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, .BufferCount = 1, .OutputWindow = params->window, .Windowed = TRUE, .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, .Flags = params->flags, }; return IDXGIFactory1_CreateSwapChain(factory, (IUnknown *) ctx->dev, &desc, swapchain_out); } static IDXGISwapChain *create_swapchain(struct d3d11_ctx *ctx, const struct pl_d3d11_swapchain_params *params) { IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter1 *adapter = NULL; IDXGIFactory1 *factory = NULL; IDXGIFactory2 *factory2 = NULL; IDXGISwapChain *swapchain = NULL; bool success = false; HRESULT hr; D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); D3D(IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void **) &factory)); hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, (void **) &factory2); if (FAILED(hr)) factory2 = NULL; bool flip = factory2 && !params->blit; UINT width = PL_DEF(params->width, 1); UINT height = PL_DEF(params->height, 1); // If both width and height are unset, the default size is the window size if (params->window && params->width == 0 && params->height == 0) { RECT rc; if (GetClientRect(params->window, &rc)) { width = PL_DEF(rc.right - rc.left, 1); height = PL_DEF(rc.bottom - rc.top, 1); } } // Return here to retry creating the swapchain do { if (factory2) { // Create a DXGI 1.2+ (Windows 8+) swap chain if possible hr = create_swapchain_1_2(ctx, factory2, params, flip, width, height, DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); } else { // Fall back to DXGI 1.1 (Windows 7) hr = create_swapchain_1_1(ctx, factory, params, width, height, DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); } if (SUCCEEDED(hr)) break; if (flip) { PL_DEBUG(ctx, "Failed to create flip-model swapchain, trying bitblt"); flip = false; continue; } PL_FATAL(ctx, "Failed to create swapchain: %s", pl_hresult_to_str(hr)); goto error; } while (true); // Prevent DXGI from making changes to the window, otherwise it will hook // the Alt+Enter keystroke and make it trigger an ugly transition to // legacy exclusive fullscreen mode. IDXGIFactory_MakeWindowAssociation(factory, params->window, DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_PRINT_SCREEN); success = true; error: if (!success) SAFE_RELEASE(swapchain); SAFE_RELEASE(factory2); SAFE_RELEASE(factory); SAFE_RELEASE(adapter); SAFE_RELEASE(dxgi_dev); return swapchain; } pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, const struct pl_d3d11_swapchain_params *params) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); pl_gpu gpu = d3d11->gpu; bool success = false; struct pl_swapchain *sw = pl_zalloc_obj(NULL, sw, struct priv); struct priv *p = PL_PRIV(sw); *sw = (struct pl_swapchain) { .impl = &d3d11_swapchain, .log = gpu->log, .gpu = gpu, }; *p = (struct priv) { .ctx = ctx, }; if (params->swapchain) { p->swapchain = params->swapchain; IDXGISwapChain_AddRef(params->swapchain); } else { p->swapchain = create_swapchain(ctx, params); if (!p->swapchain) goto error; } DXGI_SWAP_CHAIN_DESC scd = {0}; IDXGISwapChain_GetDesc(p->swapchain, &scd); if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL || scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD) { PL_INFO(gpu, "Using flip-model presentation"); } else { PL_INFO(gpu, "Using bitblt-model presentation"); } success = true; error: if (!success) { PL_FATAL(gpu, "Failed to create Direct3D 11 swapchain"); d3d11_sw_destroy(sw); sw = NULL; } return sw; } libplacebo-v4.192.1/src/d3d11/utils.c000066400000000000000000000237471417677245700171220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "utils.h" // D3D11.3 message IDs, not present in mingw-w64 v9 #define D3D11_MESSAGE_ID_CREATE_FENCE (0x300209) #define D3D11_MESSAGE_ID_DESTROY_FENCE (0x30020b) static enum pl_log_level log_level_override(unsigned int id) { switch (id) { // These warnings can happen when a pl_timer is used too often before a // blocking pl_swapchain_swap_buffers() or pl_gpu_finish(), overflowing // its internal ring buffer and causing older query objects to be reused // before their results are read. This is expected behavior, so reduce // the log level to PL_LOG_TRACE to prevent log spam. case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS: case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS: return PL_LOG_TRACE; // D3D11 writes log messages every time an object is created or // destroyed. That results in a lot of log spam, so force PL_LOG_TRACE. #define OBJ_LIFETIME_MESSAGES(obj) \ case D3D11_MESSAGE_ID_CREATE_ ## obj: \ case D3D11_MESSAGE_ID_DESTROY_ ## obj OBJ_LIFETIME_MESSAGES(CONTEXT): OBJ_LIFETIME_MESSAGES(BUFFER): OBJ_LIFETIME_MESSAGES(TEXTURE1D): OBJ_LIFETIME_MESSAGES(TEXTURE2D): OBJ_LIFETIME_MESSAGES(TEXTURE3D): OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW): OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW): OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW): OBJ_LIFETIME_MESSAGES(VERTEXSHADER): OBJ_LIFETIME_MESSAGES(HULLSHADER): OBJ_LIFETIME_MESSAGES(DOMAINSHADER): OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER): OBJ_LIFETIME_MESSAGES(PIXELSHADER): OBJ_LIFETIME_MESSAGES(INPUTLAYOUT): OBJ_LIFETIME_MESSAGES(SAMPLER): OBJ_LIFETIME_MESSAGES(BLENDSTATE): OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE): OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE): OBJ_LIFETIME_MESSAGES(QUERY): OBJ_LIFETIME_MESSAGES(PREDICATE): OBJ_LIFETIME_MESSAGES(COUNTER): OBJ_LIFETIME_MESSAGES(COMMANDLIST): OBJ_LIFETIME_MESSAGES(CLASSINSTANCE): OBJ_LIFETIME_MESSAGES(CLASSLINKAGE): OBJ_LIFETIME_MESSAGES(COMPUTESHADER): OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW): OBJ_LIFETIME_MESSAGES(VIDEODECODER): OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM): OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR): OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW): OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW): OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW): OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE): OBJ_LIFETIME_MESSAGES(FENCE): return PL_LOG_TRACE; #undef OBJ_LIFETIME_MESSAGES // Don't force the log level of any other messages. It will be mapped // from the D3D severity code instead. default: return PL_LOG_NONE; } } void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header) { if (!ctx->iqueue) return; static const enum pl_log_level severity_map[] = { [D3D11_MESSAGE_SEVERITY_CORRUPTION] = PL_LOG_FATAL, [D3D11_MESSAGE_SEVERITY_ERROR] = PL_LOG_ERR, [D3D11_MESSAGE_SEVERITY_WARNING] = PL_LOG_WARN, [D3D11_MESSAGE_SEVERITY_INFO] = PL_LOG_DEBUG, [D3D11_MESSAGE_SEVERITY_MESSAGE] = PL_LOG_DEBUG, }; enum pl_log_level header_printed = PL_LOG_NONE; uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(ctx->iqueue); if (!messages) return; uint64_t discarded = ID3D11InfoQueue_GetNumMessagesDiscardedByMessageCountLimit(ctx->iqueue); if (discarded > ctx->last_discarded) { PL_WARN(ctx, "%s:", header); header_printed = PL_LOG_WARN; // Notify number of messages skipped due to the message count limit PL_WARN(ctx, " (skipped %llu debug layer messages)", discarded - ctx->last_discarded); ctx->last_discarded = discarded; } // Copy debug layer messages to libplacebo's log output D3D11_MESSAGE *d3dmsg = NULL; for (uint64_t i = 0; i < messages; i++) { SIZE_T len; D3D(ID3D11InfoQueue_GetMessage(ctx->iqueue, i, NULL, &len)); d3dmsg = pl_zalloc(NULL, len); D3D(ID3D11InfoQueue_GetMessage(ctx->iqueue, i, d3dmsg, &len)); enum pl_log_level level = log_level_override(d3dmsg->ID); if (level == PL_LOG_NONE) level = severity_map[d3dmsg->Severity]; if (pl_msg_test(ctx->log, level)) { // If the header hasn't been printed, or it was printed for a lower // log level than the current message, print it (again) if (header_printed == PL_LOG_NONE || header_printed > level) { PL_MSG(ctx, level, "%s:", header); header_printed = level; } PL_MSG(ctx, level, " %d: %.*s", (int) d3dmsg->ID, (int) d3dmsg->DescriptionByteLength, d3dmsg->pDescription); } pl_free_ptr(&d3dmsg); } ID3D11InfoQueue_ClearStoredMessages(ctx->iqueue); error: pl_free_ptr(&d3dmsg); } HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr) { // This can be called before we have a device if (!ctx->dev) return hr; switch (hr) { case DXGI_ERROR_DEVICE_HUNG: case DXGI_ERROR_DEVICE_RESET: case DXGI_ERROR_DRIVER_INTERNAL_ERROR: ctx->is_failed = true; break; case D3DDDIERR_DEVICEREMOVED: case DXGI_ERROR_DEVICE_REMOVED: hr = ID3D11Device_GetDeviceRemovedReason(ctx->dev); ctx->is_failed = true; break; } if (ctx->is_failed) PL_ERR(ctx, "Device lost!"); return hr; } HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr) { hr = pl_d3d11_check_device_removed(ctx, hr); pl_d3d11_flush_message_queue(ctx, "After error"); return hr; } struct dll_version pl_get_dll_version(const wchar_t *name) { void *data = NULL; struct dll_version ret = {0}; DWORD size = GetFileVersionInfoSizeW(name, &(DWORD) {0}); if (!size) goto error; data = pl_alloc(NULL, size); if (!GetFileVersionInfoW(name, 0, size, data)) goto error; VS_FIXEDFILEINFO *ffi; UINT ffi_len; if (!VerQueryValueW(data, L"\\", (void**)&ffi, &ffi_len)) goto error; if (ffi_len < sizeof(*ffi)) goto error; ret = (struct dll_version) { .major = HIWORD(ffi->dwFileVersionMS), .minor = LOWORD(ffi->dwFileVersionMS), .build = HIWORD(ffi->dwFileVersionLS), .revision = LOWORD(ffi->dwFileVersionLS), }; error: pl_free(data); return ret; } wchar_t *pl_from_utf8(void *ctx, const char *str) { int count = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); pl_assert(count > 0); wchar_t *ret = pl_calloc_ptr(ctx, count, ret); MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, count); return ret; } char *pl_to_utf8(void *ctx, const wchar_t *str) { int count = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); pl_assert(count > 0); char *ret = pl_calloc_ptr(ctx, count, ret); WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, count, NULL, NULL); return ret; } static const char *hresult_str(HRESULT hr) { switch (hr) { #define CASE(name) case name: return #name CASE(S_OK); CASE(S_FALSE); CASE(E_ABORT); CASE(E_ACCESSDENIED); CASE(E_FAIL); CASE(E_HANDLE); CASE(E_INVALIDARG); CASE(E_NOINTERFACE); CASE(E_NOTIMPL); CASE(E_OUTOFMEMORY); CASE(E_POINTER); CASE(E_UNEXPECTED); CASE(DXGI_ERROR_ACCESS_DENIED); CASE(DXGI_ERROR_ACCESS_LOST); CASE(DXGI_ERROR_CANNOT_PROTECT_CONTENT); CASE(DXGI_ERROR_DEVICE_HUNG); CASE(DXGI_ERROR_DEVICE_REMOVED); CASE(DXGI_ERROR_DEVICE_RESET); CASE(DXGI_ERROR_DRIVER_INTERNAL_ERROR); CASE(DXGI_ERROR_FRAME_STATISTICS_DISJOINT); CASE(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE); CASE(DXGI_ERROR_INVALID_CALL); CASE(DXGI_ERROR_MORE_DATA); CASE(DXGI_ERROR_NAME_ALREADY_EXISTS); CASE(DXGI_ERROR_NONEXCLUSIVE); CASE(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE); CASE(DXGI_ERROR_NOT_FOUND); CASE(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED); CASE(DXGI_ERROR_REMOTE_OUTOFMEMORY); CASE(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE); CASE(DXGI_ERROR_SDK_COMPONENT_MISSING); CASE(DXGI_ERROR_SESSION_DISCONNECTED); CASE(DXGI_ERROR_UNSUPPORTED); CASE(DXGI_ERROR_WAIT_TIMEOUT); CASE(DXGI_ERROR_WAS_STILL_DRAWING); #undef CASE default: return "Unknown error"; } } static char *format_error(void *ctx, DWORD error) { wchar_t *wstr; if (!FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPWSTR)&wstr, 0, NULL)) { return NULL; } // Trim any trailing newline from the message for (int i = wcslen(wstr) - 1; i >= 0; i--) { if (wstr[i] != '\r' && wstr[i] != '\n') { wstr[i + 1] = '\0'; break; } } char *str = pl_to_utf8(ctx, wstr); LocalFree(wstr); return str; } char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr) { char *fmsg = format_error(NULL, hr); const char *code = hresult_str(hr); if (fmsg) { snprintf(buf, buf_size, "%s (%s, 0x%08lx)", fmsg, code, hr); } else { snprintf(buf, buf_size, "%s, 0x%08lx", code, hr); } pl_free(fmsg); return buf; } libplacebo-v4.192.1/src/d3d11/utils.h000066400000000000000000000071431417677245700171170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Flush debug messages from D3D11's info queue to libplacebo's log output. // Should be called regularly. void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header); // Some D3D11 functions can fail with a set of HRESULT codes which indicate the // device has been removed. This is equivalent to libplacebo's gpu_is_failed // state and indicates that the pl_gpu needs to be recreated. This function // checks for one of those HRESULTs, sets the failed state, and returns a // specific HRESULT that indicates why the device was removed (eg. GPU hang, // driver crash, etc.) HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr); // Helper function for the D3D() macro, though it can be called directly when // handling D3D11 errors if the D3D() macro isn't suitable for some reason. // Calls `pl_d3d11_check_device_removed` and `pl_d3d11_drain_debug_messages` and // returns the specific HRESULT from `pl_d3d11_check_device_removed` for logging // purposes. HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr); // Convenience macro for running DXGI/D3D11 functions and performing appropriate // actions on failure. Can also be used for any HRESULT-returning function. #define D3D(call) \ do { \ HRESULT hr_ = (call); \ if (FAILED(hr_)) { \ hr_ = pl_d3d11_after_error(ctx, hr_); \ PL_ERR(ctx, "%s: %s (%s:%d)", #call, pl_hresult_to_str(hr_), \ __FILE__, __LINE__); \ goto error; \ } \ } while (0); // Conditionally release a COM interface and set the pointer to NULL #define SAFE_RELEASE(iface) \ do { \ if (iface) \ (iface)->lpVtbl->Release(iface); \ (iface) = NULL; \ } while (0) struct dll_version { uint16_t major; uint16_t minor; uint16_t build; uint16_t revision; }; // Get the version number of a DLL. This calls GetFileVersionInfoW, which should // call LoadLibraryExW internally, so it should get the same copy of the DLL // that is loaded into memory if there is a copy in System32 and a copy in the // %PATH% or application directory. struct dll_version pl_get_dll_version(const wchar_t *name); wchar_t *pl_from_utf8(void *ctx, const char *str); char *pl_to_utf8(void *ctx, const wchar_t *str); #define pl_hresult_to_str(hr) pl_hresult_to_str_buf((char[256]){0}, 256, (hr)) char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr); libplacebo-v4.192.1/src/dispatch.c000066400000000000000000001650721417677245700167430ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "log.h" #include "shaders.h" #include "dispatch.h" #include "gpu.h" #include "pl_thread.h" // Maximum number of passes to keep around at once. If full, passes older than // MIN_AGE are evicted to make room. (Failing that, the cache size doubles) #define MAX_PASSES 100 #define MIN_AGE 10 enum { TMP_PRELUDE, // GLSL version, global definitions, etc. TMP_MAIN, // main GLSL shader body TMP_VERT_HEAD, // vertex shader inputs/outputs TMP_VERT_BODY, // vertex shader body TMP_COUNT, }; struct pl_dispatch { pl_mutex lock; pl_log log; pl_gpu gpu; uint8_t current_ident; uint8_t current_index; bool dynamic_constants; int max_passes; void (*info_callback)(void *, const struct pl_dispatch_info *); void *info_priv; PL_ARRAY(pl_shader) shaders; // to avoid re-allocations PL_ARRAY(struct pass *) passes; // compiled passes PL_ARRAY(struct cached_pass) cached_passes; // not-yet-compiled passes // temporary buffers to help avoid re_allocations during pass creation pl_str tmp[TMP_COUNT]; }; enum pass_var_type { PASS_VAR_NONE = 0, PASS_VAR_GLOBAL, // regular/global uniforms PASS_VAR_UBO, // uniform buffers PASS_VAR_PUSHC // push constants }; // Cached metadata about a variable's effective placement / update method struct pass_var { int index; // for pl_var_update enum pass_var_type type; struct pl_var_layout layout; void *cached_data; }; struct pass { uint64_t signature; // as returned by pl_shader_signature pl_pass pass; int last_index; // contains cached data and update metadata, same order as pl_shader struct pass_var *vars; int num_var_locs; // for uniform buffer updates struct pl_shader_desc ubo_desc; // temporary int ubo_index; pl_buf ubo; // Cached pl_pass_run_params. This will also contain mutable allocations // for the push constants, descriptor bindings (including the binding for // the UBO pre-filled), vertex array and variable updates struct pl_pass_run_params run_params; // for pl_dispatch_info pl_timer timer; uint64_t ts_last; uint64_t ts_peak; uint64_t ts_sum; uint64_t samples[PL_ARRAY_SIZE(((struct pl_dispatch_info *) NULL)->samples)]; int ts_idx; }; struct cached_pass { uint64_t signature; const uint8_t *cached_program; size_t cached_program_len; }; static void pass_destroy(pl_dispatch dp, struct pass *pass) { if (!pass) return; pl_buf_destroy(dp->gpu, &pass->ubo); pl_pass_destroy(dp->gpu, &pass->pass); pl_timer_destroy(dp->gpu, &pass->timer); pl_free(pass); } pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu) { struct pl_dispatch *dp = pl_zalloc_ptr(NULL, dp); pl_mutex_init(&dp->lock); dp->log = log; dp->gpu = gpu; dp->max_passes = MAX_PASSES; return dp; } void pl_dispatch_destroy(pl_dispatch *ptr) { pl_dispatch dp = *ptr; if (!dp) return; for (int i = 0; i < dp->passes.num; i++) pass_destroy(dp, dp->passes.elem[i]); for (int i = 0; i < dp->shaders.num; i++) pl_shader_free(&dp->shaders.elem[i]); pl_mutex_destroy(&dp->lock); pl_free(dp); *ptr = NULL; } pl_shader pl_dispatch_begin_ex(pl_dispatch dp, bool unique) { pl_mutex_lock(&dp->lock); struct pl_shader_params params = { .id = unique ? dp->current_ident++ : 0, .gpu = dp->gpu, .index = dp->current_index, .dynamic_constants = dp->dynamic_constants, }; pl_shader sh = NULL; PL_ARRAY_POP(dp->shaders, &sh); pl_mutex_unlock(&dp->lock); if (sh) { sh->res.params = params; return sh; } return pl_shader_alloc(dp->log, ¶ms); } void pl_dispatch_mark_dynamic(pl_dispatch dp, bool dynamic) { dp->dynamic_constants = dynamic; } void pl_dispatch_callback(pl_dispatch dp, void *priv, void (*cb)(void *priv, const struct pl_dispatch_info *)) { dp->info_callback = cb; dp->info_priv = priv; } pl_shader pl_dispatch_begin(pl_dispatch dp) { return pl_dispatch_begin_ex(dp, false); } static bool add_pass_var(pl_dispatch dp, void *tmp, struct pass *pass, struct pl_pass_params *params, const struct pl_shader_var *sv, struct pass_var *pv, bool greedy) { pl_gpu gpu = dp->gpu; if (pv->type) return true; // Try not to use push constants for "large" values like matrices in the // first pass, since this is likely to exceed the VGPR/pushc size budgets bool try_pushc = greedy || (sv->var.dim_m == 1 && sv->var.dim_a == 1) || sv->dynamic; if (try_pushc && gpu->glsl.vulkan && gpu->limits.max_pushc_size) { pv->layout = pl_std430_layout(params->push_constants_size, &sv->var); size_t new_size = pv->layout.offset + pv->layout.size; if (new_size <= gpu->limits.max_pushc_size) { params->push_constants_size = new_size; pv->type = PASS_VAR_PUSHC; return true; } } // If we haven't placed all PCs yet, don't place anything else, since // we want to try and fit more stuff into PCs before "giving up" if (!greedy) return true; int num_locs = sv->var.dim_v * sv->var.dim_m * sv->var.dim_a; bool can_var = pass->num_var_locs + num_locs <= gpu->limits.max_variable_comps; // Attempt using uniform buffer next. The GLSL version 440 check is due // to explicit offsets on UBO entries. In theory we could leave away // the offsets and support UBOs for older GL as well, but this is a nice // safety net for driver bugs (and also rules out potentially buggy drivers) // Also avoid UBOs for highly dynamic stuff since that requires synchronizing // the UBO writes every frame bool try_ubo = !can_var || !sv->dynamic; if (try_ubo && gpu->glsl.version >= 440 && gpu->limits.max_ubo_size) { if (sh_buf_desc_append(tmp, gpu, &pass->ubo_desc, &pv->layout, sv->var)) { pv->type = PASS_VAR_UBO; return true; } } // Otherwise, use global uniforms if (can_var) { pv->type = PASS_VAR_GLOBAL; pv->index = params->num_variables; pv->layout = pl_var_host_layout(0, &sv->var); PL_ARRAY_APPEND_RAW(tmp, params->variables, params->num_variables, sv->var); pass->num_var_locs += num_locs; return true; } // Ran out of variable binding methods. The most likely scenario in which // this can happen is if we're using a GPU that does not support global // input vars and we've exhausted the UBO size limits. PL_ERR(dp, "Unable to add input variable '%s': possibly exhausted " "variable count / UBO size limits?", sv->var.name); return false; } #define ADD(x, ...) pl_str_append_asprintf_c(dp, (x), __VA_ARGS__) #define ADD_STR(x, s) pl_str_append(dp, (x), (s)) static void add_var(pl_dispatch dp, pl_str *body, const struct pl_var *var) { ADD(body, "%s %s", pl_var_glsl_type_name(*var), var->name); if (var->dim_a > 1) { ADD(body, "[%d];\n", var->dim_a); } else { ADD(body, ";\n"); } } static int cmp_buffer_var(const void *pa, const void *pb) { const struct pl_buffer_var * const *a = pa, * const *b = pb; return PL_CMP((*a)->layout.offset, (*b)->layout.offset); } static void add_buffer_vars(pl_dispatch dp, void *tmp, pl_str *body, const struct pl_buffer_var *vars, int num) { // Sort buffer vars const struct pl_buffer_var **sorted_vars = pl_calloc_ptr(tmp, num, sorted_vars); for (int i = 0; i < num; i++) sorted_vars[i] = &vars[i]; qsort(sorted_vars, num, sizeof(sorted_vars[0]), cmp_buffer_var); ADD(body, "{\n"); for (int i = 0; i < num; i++) { // Add an explicit offset wherever possible if (dp->gpu->glsl.version >= 440) ADD(body, " layout(offset=%zu) ", sorted_vars[i]->layout.offset); add_var(dp, body, &sorted_vars[i]->var); } ADD(body, "};\n"); } static ident_t sh_var_from_va(pl_shader sh, const char *name, const struct pl_vertex_attrib *va, const void *data) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_from_fmt(va->fmt, name), .data = data, }); } struct generate_params { void *tmp; pl_shader sh; struct pass *pass; struct pl_pass_params *pass_params; ident_t vert_pos; ident_t out_mat; ident_t out_off; }; static void generate_shaders(pl_dispatch dp, const struct generate_params *params) { pl_gpu gpu = dp->gpu; pl_shader sh = params->sh; void *tmp = params->tmp; const struct pl_shader_res *res = pl_shader_finalize(sh); struct pass *pass = params->pass; struct pl_pass_params *pass_params = params->pass_params; pl_str *pre = &dp->tmp[TMP_PRELUDE]; ADD(pre, "#version %d%s\n", gpu->glsl.version, (gpu->glsl.gles && gpu->glsl.version > 100) ? " es" : ""); if (pass_params->type == PL_PASS_COMPUTE) ADD(pre, "#extension GL_ARB_compute_shader : enable\n"); // Enable this unconditionally if the GPU supports it, since we have no way // of knowing whether subgroups are being used or not if (gpu->glsl.subgroup_size) { ADD(pre, "#extension GL_KHR_shader_subgroup_basic : enable \n" "#extension GL_KHR_shader_subgroup_vote : enable \n" "#extension GL_KHR_shader_subgroup_arithmetic : enable \n" "#extension GL_KHR_shader_subgroup_ballot : enable \n" "#extension GL_KHR_shader_subgroup_shuffle : enable \n"); } // Enable all extensions needed for different types of input bool has_ssbo = false, has_ubo = false, has_img = false, has_texel = false, has_ext = false, has_nofmt = false, has_gather = false; for (int i = 0; i < sh->descs.num; i++) { switch (sh->descs.elem[i].desc.type) { case PL_DESC_BUF_UNIFORM: has_ubo = true; break; case PL_DESC_BUF_STORAGE: has_ssbo = true; break; case PL_DESC_BUF_TEXEL_UNIFORM: has_texel = true; break; case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = res->descriptors[i].binding.object; has_nofmt |= !buf->params.format->glsl_format; has_texel = true; break; } case PL_DESC_STORAGE_IMG: { pl_tex tex = res->descriptors[i].binding.object; has_nofmt |= !tex->params.format->glsl_format; has_img = true; break; } case PL_DESC_SAMPLED_TEX: { pl_tex tex = res->descriptors[i].binding.object; has_gather |= tex->params.format->gatherable; switch (tex->sampler_type) { case PL_SAMPLER_NORMAL: break; case PL_SAMPLER_RECT: break; case PL_SAMPLER_EXTERNAL: has_ext = true; break; case PL_SAMPLER_TYPE_COUNT: pl_unreachable(); } break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } if (has_img) ADD(pre, "#extension GL_ARB_shader_image_load_store : enable\n"); if (has_ubo) ADD(pre, "#extension GL_ARB_uniform_buffer_object : enable\n"); if (has_ssbo) ADD(pre, "#extension GL_ARB_shader_storage_buffer_object : enable\n"); if (has_texel) ADD(pre, "#extension GL_ARB_texture_buffer_object : enable\n"); if (has_ext) ADD(pre, "#extension GL_OES_EGL_image_external : enable\n"); if (has_nofmt) ADD(pre, "#extension GL_EXT_shader_image_load_formatted : enable\n"); if (has_gather) ADD(pre, "#extension GL_ARB_texture_gather : enable\n"); if (gpu->glsl.gles) { // Use 32-bit precision for floats if possible ADD(pre, "#ifdef GL_FRAGMENT_PRECISION_HIGH \n" "precision highp float; \n" "#else \n" "precision mediump float; \n" "#endif \n"); // Always use 16-bit precision for samplers ADD(pre, "precision mediump sampler2D; \n"); if (gpu->limits.max_tex_1d_dim) ADD(pre, "precision mediump sampler1D; \n"); if (gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100) ADD(pre, "precision mediump sampler3D; \n"); // Integer math has a good chance of caring about precision ADD(pre, "precision highp int; \n"); } // Add all of the push constants as their own element if (pass_params->push_constants_size) { // We re-use add_buffer_vars to make sure variables are sorted, this // is important because the push constants can be out-of-order in // `pass->vars` PL_ARRAY(struct pl_buffer_var) pc_bvars = {0}; for (int i = 0; i < res->num_variables; i++) { if (pass->vars[i].type != PASS_VAR_PUSHC) continue; PL_ARRAY_APPEND(tmp, pc_bvars, (struct pl_buffer_var) { .var = res->variables[i].var, .layout = pass->vars[i].layout, }); } ADD(pre, "layout(std430, push_constant) uniform PushC "); add_buffer_vars(dp, tmp, pre, pc_bvars.elem, pc_bvars.num); } // Add all of the specialization constants for (int i = 0; i < res->num_constants; i++) { static const char *types[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = "int", [PL_VAR_UINT] = "uint", [PL_VAR_FLOAT] = "float", }; const struct pl_shader_const *sc = &res->constants[i]; ADD(pre, "layout(constant_id=%"PRIu32") const %s %s = 0; \n", pass_params->constants[i].id, types[sc->type], sc->name); } // Add all of the required descriptors for (int i = 0; i < res->num_descriptors; i++) { const struct pl_shader_desc *sd = &res->descriptors[i]; const struct pl_desc *desc = &pass_params->descriptors[i]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { static const char *types[][4] = { [PL_SAMPLER_NORMAL][1] = "sampler1D", [PL_SAMPLER_NORMAL][2] = "sampler2D", [PL_SAMPLER_NORMAL][3] = "sampler3D", [PL_SAMPLER_RECT][2] = "sampler2DRect", [PL_SAMPLER_EXTERNAL][2] = "samplerExternalOES", }; pl_tex tex = sd->binding.object; int dims = pl_tex_params_dimension(tex->params); const char *type = types[tex->sampler_type][dims]; pl_assert(type); static const char prefixes[PL_FMT_TYPE_COUNT] = { [PL_FMT_FLOAT] = ' ', [PL_FMT_UNORM] = ' ', [PL_FMT_SNORM] = ' ', [PL_FMT_UINT] = 'u', [PL_FMT_SINT] = 'i', }; char prefix = prefixes[tex->params.format->type]; pl_assert(prefix); const char *prec = ""; if (prefix != ' ' && gpu->glsl.gles) prec = "highp "; // Vulkan requires explicit bindings; GL always sets the // bindings manually to avoid relying on the user doing so if (gpu->glsl.vulkan) ADD(pre, "layout(binding=%d) ", desc->binding); pl_assert(type && prefix); ADD(pre, "uniform %s%c%s %s;\n", prec, prefix, type, desc->name); break; } case PL_DESC_STORAGE_IMG: { static const char *types[] = { [1] = "image1D", [2] = "image2D", [3] = "image3D", }; // For better compatibility, we have to explicitly label the // type of data we will be reading/writing to this image. pl_tex tex = sd->binding.object; const char *format = tex->params.format->glsl_format; const char *access = pl_desc_access_glsl_name(desc->access); int dims = pl_tex_params_dimension(tex->params); if (gpu->glsl.vulkan) { if (format) { ADD(pre, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(pre, "layout(binding=%d) ", desc->binding); } } else if (gpu->glsl.version >= 130 && format) { ADD(pre, "layout(%s) ", format); } ADD(pre, "%s%s%s restrict uniform %s %s;\n", access, (sd->memory & PL_MEMORY_COHERENT) ? " coherent" : "", (sd->memory & PL_MEMORY_VOLATILE) ? " volatile" : "", types[dims], desc->name); break; } case PL_DESC_BUF_UNIFORM: if (gpu->glsl.vulkan) { ADD(pre, "layout(std140, binding=%d) ", desc->binding); } else { ADD(pre, "layout(std140) "); } ADD(pre, "uniform %s ", desc->name); add_buffer_vars(dp, tmp, pre, sd->buffer_vars, sd->num_buffer_vars); break; case PL_DESC_BUF_STORAGE: if (gpu->glsl.vulkan) { ADD(pre, "layout(std430, binding=%d) ", desc->binding); } else if (gpu->glsl.version >= 140) { ADD(pre, "layout(std430) "); } ADD(pre, "%s%s%s restrict buffer %s ", pl_desc_access_glsl_name(desc->access), (sd->memory & PL_MEMORY_COHERENT) ? " coherent" : "", (sd->memory & PL_MEMORY_VOLATILE) ? " volatile" : "", desc->name); add_buffer_vars(dp, tmp, pre, sd->buffer_vars, sd->num_buffer_vars); break; case PL_DESC_BUF_TEXEL_UNIFORM: if (gpu->glsl.vulkan) ADD(pre, "layout(binding=%d) ", desc->binding); ADD(pre, "uniform samplerBuffer %s;\n", desc->name); break; case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = sd->binding.object; const char *format = buf->params.format->glsl_format; const char *access = pl_desc_access_glsl_name(desc->access); if (gpu->glsl.vulkan) { if (format) { ADD(pre, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(pre, "layout(binding=%d) ", desc->binding); } } else if (format) { ADD(pre, "layout(%s) ", format); } ADD(pre, "%s%s%s restrict uniform imageBuffer %s;\n", access, (sd->memory & PL_MEMORY_COHERENT) ? " coherent" : "", (sd->memory & PL_MEMORY_VOLATILE) ? " volatile" : "", desc->name); break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } // Add all of the remaining variables for (int i = 0; i < res->num_variables; i++) { const struct pl_var *var = &res->variables[i].var; const struct pass_var *pv = &pass->vars[i]; if (pv->type != PASS_VAR_GLOBAL) continue; ADD(pre, "uniform "); add_var(dp, pre, var); } char *vert_in = gpu->glsl.version >= 130 ? "in" : "attribute"; char *vert_out = gpu->glsl.version >= 130 ? "out" : "varying"; char *frag_in = gpu->glsl.version >= 130 ? "in" : "varying"; pl_str *glsl = &dp->tmp[TMP_MAIN]; ADD_STR(glsl, *pre); const char *out_color = "gl_FragColor"; switch(pass_params->type) { case PL_PASS_RASTER: { pl_assert(params->vert_pos); pl_str *vert_head = &dp->tmp[TMP_VERT_HEAD]; pl_str *vert_body = &dp->tmp[TMP_VERT_BODY]; // Set up a trivial vertex shader ADD_STR(vert_head, *pre); ADD(vert_body, "void main() {\n"); for (int i = 0; i < sh->vas.num; i++) { const struct pl_vertex_attrib *va = &pass_params->vertex_attribs[i]; const struct pl_shader_va *sva = &sh->vas.elem[i]; const char *type = va->fmt->glsl_type; // Use the pl_shader_va for the name in the fragment shader since // the pl_vertex_attrib is already mangled for the vertex shader const char *name = sva->attr.name; char loc[32]; snprintf(loc, sizeof(loc), "layout(location=%d)", va->location); // Older GLSL doesn't support the use of explicit locations if (gpu->glsl.version < 430) loc[0] = '\0'; ADD(vert_head, "%s %s %s %s;\n", loc, vert_in, type, va->name); if (strcmp(name, params->vert_pos) == 0) { pl_assert(va->fmt->num_components == 2); ADD(vert_body, "vec2 va_pos = %s; \n", va->name); if (params->out_mat) ADD(vert_body, "va_pos = %s * va_pos; \n", params->out_mat); if (params->out_off) ADD(vert_body, "va_pos += %s; \n", params->out_off); ADD(vert_body, "gl_Position = vec4(va_pos, 0.0, 1.0); \n"); } else { // Everything else is just blindly passed through ADD(vert_head, "%s %s %s %s;\n", loc, vert_out, type, name); ADD(vert_body, "%s = %s;\n", name, va->name); ADD(glsl, "%s %s %s %s;\n", loc, frag_in, type, name); } } ADD(vert_body, "}"); ADD_STR(vert_head, *vert_body); pass_params->vertex_shader = (char *) vert_head->buf; pl_hash_merge(&pass->signature, pl_str_hash(*vert_head)); // GLSL 130+ doesn't use the magic gl_FragColor if (gpu->glsl.version >= 130) { out_color = "out_color"; ADD(glsl, "%s out vec4 %s;\n", gpu->glsl.version >= 430 ? "layout(location=0) " : "", out_color); } break; } case PL_PASS_COMPUTE: ADD(glsl, "layout (local_size_x = %d, local_size_y = %d) in;\n", res->compute_group_size[0], res->compute_group_size[1]); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } // Set up the main shader body ADD(glsl, "%s", res->glsl); ADD(glsl, "void main() {\n"); pl_assert(res->input == PL_SHADER_SIG_NONE); switch (pass_params->type) { case PL_PASS_RASTER: pl_assert(res->output == PL_SHADER_SIG_COLOR); ADD(glsl, "%s = %s();\n", out_color, res->name); break; case PL_PASS_COMPUTE: ADD(glsl, "%s();\n", res->name); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } ADD(glsl, "}"); pass_params->glsl_shader = (char *) glsl->buf; pl_hash_merge(&pass->signature, pl_str_hash(*glsl)); } #undef ADD #undef ADD_STR #define pass_age(pass) (dp->current_index - (pass)->last_index) static int cmp_pass_age(const void *ptra, const void *ptrb) { const struct pass *a = *(const struct pass **) ptra; const struct pass *b = *(const struct pass **) ptrb; return b->last_index - a->last_index; } static void garbage_collect_passes(pl_dispatch dp) { if (dp->passes.num <= dp->max_passes) return; // Garbage collect oldest passes, starting at the middle qsort(dp->passes.elem, dp->passes.num, sizeof(struct pass *), cmp_pass_age); int idx = dp->passes.num / 2; while (idx < dp->passes.num && pass_age(dp->passes.elem[idx]) < MIN_AGE) idx++; for (int i = idx; i < dp->passes.num; i++) pass_destroy(dp, dp->passes.elem[i]); int num_evicted = dp->passes.num - idx; dp->passes.num = idx; if (num_evicted) { PL_DEBUG(dp, "Evicted %d passes from dispatch cache, consider " "using more dynamic shaders", num_evicted); } else { dp->max_passes *= 2; } } static struct pass *finalize_pass(pl_dispatch dp, pl_shader sh, pl_tex target, ident_t vert_pos, const struct pl_blend_params *blend, bool load, const struct pl_dispatch_vertex_params *vparams, const struct pl_transform2x2 *proj) { struct pass *pass = pl_alloc_ptr(dp, pass); *pass = (struct pass) { .signature = 0x0, // updated incrementally below .last_index = dp->current_index, .ubo_desc = { .desc = { .name = "UBO", .type = PL_DESC_BUF_UNIFORM, }, }, }; // For identifiers tied to the lifetime of this shader void *tmp = SH_TMP(sh); struct pl_pass_params params = { .type = pl_shader_is_compute(sh) ? PL_PASS_COMPUTE : PL_PASS_RASTER, .num_descriptors = sh->descs.num, .vertex_type = vparams ? vparams->vertex_type : PL_PRIM_TRIANGLE_STRIP, .vertex_stride = vparams ? vparams->vertex_stride : 0, .blend_params = blend, }; struct generate_params gen_params = { .tmp = tmp, .pass = pass, .pass_params = ¶ms, .sh = sh, .vert_pos = vert_pos, }; if (params.type == PL_PASS_RASTER) { assert(target); params.target_format = target->params.format; params.load_target = load; // Fill in the vertex attributes array params.num_vertex_attribs = sh->vas.num; params.vertex_attribs = pl_calloc_ptr(tmp, sh->vas.num, params.vertex_attribs); int va_loc = 0; for (int i = 0; i < sh->vas.num; i++) { struct pl_vertex_attrib *va = ¶ms.vertex_attribs[i]; *va = sh->vas.elem[i].attr; // Mangle the name to make sure it doesn't conflict with the // fragment shader input va->name = pl_asprintf(tmp, "%s_v", va->name); // Place the vertex attribute va->location = va_loc; if (!vparams) { va->offset = params.vertex_stride; params.vertex_stride += va->fmt->texel_size; } // The number of vertex attribute locations consumed by a vertex // attribute is the number of vec4s it consumes, rounded up const size_t va_loc_size = sizeof(float[4]); va_loc += (va->fmt->texel_size + va_loc_size - 1) / va_loc_size; } // Hash in the raster state configuration pl_hash_merge(&pass->signature, (uint64_t) params.vertex_type); pl_hash_merge(&pass->signature, (uint64_t) params.vertex_stride); pl_hash_merge(&pass->signature, (uint64_t) params.load_target); pl_hash_merge(&pass->signature, target->params.format->signature); if (blend) { pl_static_assert(sizeof(*blend) == sizeof(enum pl_blend_mode) * 4); pl_hash_merge(&pass->signature, pl_mem_hash(blend, sizeof(*blend))); } // Load projection matrix if required if (proj && memcmp(&proj->mat, &pl_matrix2x2_identity, sizeof(proj->mat)) != 0) { gen_params.out_mat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("proj"), .data = PL_TRANSPOSE_2X2(proj->mat.m), }); } if (proj && (proj->c[0] || proj->c[1])) { gen_params.out_off = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("offset"), .data = proj->c, }); } } // Place all of the compile-time constants uint8_t *constant_data = NULL; if (sh->consts.num) { params.num_constants = sh->consts.num; params.constants = pl_alloc(tmp, sh->consts.num * sizeof(struct pl_constant)); // Compute offsets size_t total_size = 0; uint32_t const_id = 0; for (int i = 0; i < sh->consts.num; i++) { params.constants[i] = (struct pl_constant) { .type = sh->consts.elem[i].type, .id = const_id++, .offset = total_size, }; total_size += pl_var_type_size(sh->consts.elem[i].type); } // Write values into the constants buffer params.constant_data = constant_data = pl_alloc(pass, total_size); for (int i = 0; i < sh->consts.num; i++) { const struct pl_shader_const *sc = &sh->consts.elem[i]; void *data = constant_data + params.constants[i].offset; memcpy(data, sc->data, pl_var_type_size(sc->type)); } } // Place all the variables; these will dynamically end up in different // locations based on what the underlying GPU supports (UBOs, pushc, etc.) // // We go through the list twice, once to place stuff that we definitely // want inside PCs, and then a second time to opportunistically place the rest. pass->vars = pl_calloc_ptr(pass, sh->vars.num, pass->vars); for (int i = 0; i < sh->vars.num; i++) { if (!add_pass_var(dp, tmp, pass, ¶ms, &sh->vars.elem[i], &pass->vars[i], false)) goto error; } for (int i = 0; i < sh->vars.num; i++) { if (!add_pass_var(dp, tmp, pass, ¶ms, &sh->vars.elem[i], &pass->vars[i], true)) goto error; } // Now that we know the variable placement, finalize pushc/UBO sizes params.push_constants_size = PL_ALIGN2(params.push_constants_size, 4); size_t ubo_size = sh_buf_desc_size(&pass->ubo_desc); if (ubo_size) { pass->ubo_index = sh->descs.num; sh_desc(sh, pass->ubo_desc); }; // Place and fill in the descriptors const int num_descs = sh->descs.num; int binding[PL_DESC_TYPE_COUNT] = {0}; params.num_descriptors = num_descs; params.descriptors = pl_calloc_ptr(tmp, num_descs, params.descriptors); for (int i = 0; i < num_descs; i++) { struct pl_desc *desc = ¶ms.descriptors[i]; *desc = sh->descs.elem[i].desc; desc->binding = binding[pl_desc_namespace(dp->gpu, desc->type)]++; } // Finalize the shader and look it up in the pass cache generate_shaders(dp, &gen_params); for (int i = 0; i < dp->passes.num; i++) { struct pass *p = dp->passes.elem[i]; if (p->signature != pass->signature) continue; // Found existing shader, re-use directly if (p->ubo) sh->descs.elem[p->ubo_index].binding.object = p->ubo; pl_free(p->run_params.constant_data); p->run_params.constant_data = pl_steal(p, constant_data); p->last_index = dp->current_index; pl_free(pass); return p; } // Find and attach the cached program, if any for (int i = 0; i < dp->cached_passes.num; i++) { if (dp->cached_passes.elem[i].signature == pass->signature) { PL_DEBUG(dp, "Re-using cached program with signature 0x%llx", (unsigned long long) pass->signature); params.cached_program = dp->cached_passes.elem[i].cached_program; params.cached_program_len = dp->cached_passes.elem[i].cached_program_len; PL_ARRAY_REMOVE_AT(dp->cached_passes, i); break; } } pass->pass = pl_pass_create(dp->gpu, ¶ms); if (!pass->pass) { PL_ERR(dp, "Failed creating render pass for dispatch"); // Add it anyway } struct pl_pass_run_params *rparams = &pass->run_params; rparams->pass = pass->pass; rparams->constant_data = constant_data; rparams->push_constants = pl_zalloc(pass, params.push_constants_size); rparams->desc_bindings = pl_calloc_ptr(pass, params.num_descriptors, rparams->desc_bindings); if (ubo_size && pass->pass) { // Create the UBO pass->ubo = pl_buf_create(dp->gpu, pl_buf_params( .size = ubo_size, .uniform = true, .host_writable = true, )); if (!pass->ubo) { PL_ERR(dp, "Failed creating uniform buffer for dispatch"); goto error; } sh->descs.elem[pass->ubo_index].binding.object = pass->ubo; } if (params.type == PL_PASS_RASTER && !vparams) { // Generate the vertex array placeholder rparams->vertex_count = 4; // single quad size_t vert_size = rparams->vertex_count * params.vertex_stride; rparams->vertex_data = pl_zalloc(pass, vert_size); } pass->timer = pl_timer_create(dp->gpu); PL_ARRAY_APPEND(dp, dp->passes, pass); return pass; error: pass_destroy(dp, pass); return NULL; } static void update_pass_var(pl_dispatch dp, struct pass *pass, const struct pl_shader_var *sv, struct pass_var *pv) { struct pl_var_layout host_layout = pl_var_host_layout(0, &sv->var); pl_assert(host_layout.size); // Use the cache to skip updates if possible if (pv->cached_data && !memcmp(sv->data, pv->cached_data, host_layout.size)) return; if (!pv->cached_data) pv->cached_data = pl_alloc(pass, host_layout.size); memcpy(pv->cached_data, sv->data, host_layout.size); struct pl_pass_run_params *rparams = &pass->run_params; switch (pv->type) { case PASS_VAR_NONE: pl_unreachable(); case PASS_VAR_GLOBAL: { struct pl_var_update vu = { .index = pv->index, .data = sv->data, }; PL_ARRAY_APPEND_RAW(pass, rparams->var_updates, rparams->num_var_updates, vu); break; } case PASS_VAR_UBO: { pl_assert(pass->ubo); const size_t offset = pv->layout.offset; if (host_layout.stride == pv->layout.stride) { pl_assert(host_layout.size == pv->layout.size); pl_buf_write(dp->gpu, pass->ubo, offset, sv->data, host_layout.size); } else { // Coalesce strided UBO write into a single pl_buf_write to avoid // unnecessary synchronization overhead by assembling the correctly // strided upload in RAM pl_grow(dp, &dp->tmp[0].buf, pv->layout.size); uint8_t * const tmp = dp->tmp[0].buf; const uint8_t *src = sv->data; const uint8_t *end = src + host_layout.size; uint8_t *dst = tmp; while (src < end) { memcpy(dst, src, host_layout.stride); src += host_layout.stride; dst += pv->layout.stride; } pl_buf_write(dp->gpu, pass->ubo, offset, tmp, pv->layout.size); } break; } case PASS_VAR_PUSHC: pl_assert(rparams->push_constants); memcpy_layout(rparams->push_constants, pv->layout, sv->data, host_layout); break; }; } static void compute_vertex_attribs(pl_dispatch dp, pl_shader sh, int width, int height, ident_t *out_scale) { // Simulate vertex attributes using global definitions *out_scale = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("out_scale"), .data = &(float[2]){ 1.0 / width, 1.0 / height }, .dynamic = true, }); GLSLP("#define frag_pos(id) (vec2(id) + vec2(0.5)) \n" "#define frag_map(id) (%s * frag_pos(id)) \n" "#define gl_FragCoord vec4(frag_pos(gl_GlobalInvocationID), 0.0, 1.0) \n", *out_scale); for (int n = 0; n < sh->vas.num; n++) { const struct pl_shader_va *sva = &sh->vas.elem[n]; ident_t points[4]; for (int i = 0; i < PL_ARRAY_SIZE(points); i++) { char name[4]; snprintf(name, sizeof(name), "p%d", i); points[i] = sh_var_from_va(sh, name, &sva->attr, sva->data[i]); } GLSLP("#define %s_map(id) " "(mix(mix(%s, %s, frag_map(id).x), " " mix(%s, %s, frag_map(id).x), " "frag_map(id).y))\n" "#define %s (%s_map(gl_GlobalInvocationID))\n", sva->attr.name, points[0], points[1], points[2], points[3], sva->attr.name, sva->attr.name); } } static void translate_compute_shader(pl_dispatch dp, pl_shader sh, const struct pl_rect2d *rc, const struct pl_dispatch_params *params) { int width = abs(pl_rect_w(*rc)), height = abs(pl_rect_h(*rc)); if (sh->transpose) PL_SWAP(width, height); ident_t out_scale; compute_vertex_attribs(dp, sh, width, height, &out_scale); // Simulate a framebuffer using storage images pl_assert(params->target->params.storable); pl_assert(sh->res.output == PL_SHADER_SIG_COLOR); ident_t fbo = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->target, .desc = { .name = "out_image", .type = PL_DESC_STORAGE_IMG, .access = params->blend_params ? PL_DESC_ACCESS_READWRITE : PL_DESC_ACCESS_WRITEONLY, }, }); ident_t base = sh_var(sh, (struct pl_shader_var) { .data = &(int[2]){ rc->x0, rc->y0 }, .dynamic = true, .var = { .name = "base", .type = PL_VAR_SINT, .dim_v = 2, .dim_m = 1, .dim_a = 1, }, }); int dx = rc->x0 > rc->x1 ? -1 : 1, dy = rc->y0 > rc->y1 ? -1 : 1; const char *swiz = sh->transpose ? "yx" : "xy"; GLSL("ivec2 dir = ivec2(%d, %d);\n", dx, dy); // hard-code, not worth var GLSL("ivec2 pos = %s + dir * ivec2(gl_GlobalInvocationID).%s;\n", base, swiz); GLSL("vec2 fpos = %s * vec2(gl_GlobalInvocationID);\n", out_scale); GLSL("if (fpos.x < 1.0 && fpos.y < 1.0) {\n"); if (params->blend_params) { GLSL("vec4 orig = imageLoad(%s, pos);\n", fbo); static const char *modes[] = { [PL_BLEND_ZERO] = "0.0", [PL_BLEND_ONE] = "1.0", [PL_BLEND_SRC_ALPHA] = "color.a", [PL_BLEND_ONE_MINUS_SRC_ALPHA] = "(1.0 - color.a)", }; GLSL("color = vec4(color.rgb * vec3(%s), color.a * %s) \n" " + vec4(orig.rgb * vec3(%s), orig.a * %s);\n", modes[params->blend_params->src_rgb], modes[params->blend_params->src_alpha], modes[params->blend_params->dst_rgb], modes[params->blend_params->dst_alpha]); } GLSL("imageStore(%s, pos, color);\n", fbo); GLSL("}\n"); sh->res.output = PL_SHADER_SIG_NONE; } static void run_pass(pl_dispatch dp, pl_shader sh, struct pass *pass) { const struct pl_shader_res *res = pl_shader_finalize(sh); pl_pass_run(dp->gpu, &pass->run_params); for (uint64_t ts; (ts = pl_timer_query(dp->gpu, pass->timer));) { PL_TRACE(dp, "Spent %.3f ms on shader: %s", ts / 1e6, res->description); uint64_t old = pass->samples[pass->ts_idx]; pass->samples[pass->ts_idx] = ts; pass->ts_last = ts; pass->ts_peak = PL_MAX(pass->ts_peak, ts); pass->ts_sum += ts; pass->ts_idx = (pass->ts_idx + 1) % PL_ARRAY_SIZE(pass->samples); if (old) { pass->ts_sum -= old; if (old == pass->ts_peak) { uint64_t new_peak = 0; for (int i = 0; i < PL_ARRAY_SIZE(pass->samples); i++) new_peak = PL_MAX(new_peak, pass->samples[i]); pass->ts_peak = new_peak; } } } if (!dp->info_callback) return; struct pl_dispatch_info info; info.signature = pass->signature; info.shader = res; // Test to see if the ring buffer already wrapped around once if (pass->samples[pass->ts_idx]) { info.num_samples = PL_ARRAY_SIZE(pass->samples); int num_wrapped = info.num_samples - pass->ts_idx; memcpy(info.samples, &pass->samples[pass->ts_idx], num_wrapped * sizeof(info.samples[0])); memcpy(&info.samples[num_wrapped], pass->samples, pass->ts_idx * sizeof(info.samples[0])); } else { info.num_samples = pass->ts_idx; memcpy(info.samples, pass->samples, pass->ts_idx * sizeof(info.samples[0])); } info.last = pass->ts_last; info.peak = pass->ts_peak; info.average = pass->ts_sum / PL_MAX(info.num_samples, 1); dp->info_callback(dp->info_priv, &info); } bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params) { pl_shader sh = *params->shader; const struct pl_shader_res *res = &sh->res; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (res->input != PL_SHADER_SIG_NONE || res->output != PL_SHADER_SIG_COLOR) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } const struct pl_tex_params *tpars = ¶ms->target->params; if (pl_tex_params_dimension(*tpars) != 2 || !tpars->renderable) { PL_ERR(dp, "Trying to dispatch a shader using an invalid target " "texture. The target must be a renderable 2D texture."); goto error; } const struct pl_gpu_limits *limits = &dp->gpu->limits; bool can_compute = tpars->storable; if (can_compute && params->blend_params) can_compute = tpars->format->caps & PL_FMT_CAP_READWRITE; if (pl_shader_is_compute(sh) && !can_compute) { PL_ERR(dp, "Trying to dispatch using a compute shader with a " "non-storable or incompatible target texture."); goto error; } else if (can_compute && limits->compute_queues > limits->fragment_queues) { if (sh_try_compute(sh, 16, 16, true, 0)) PL_TRACE(dp, "Upgrading fragment shader to compute shader."); } struct pl_rect2d rc = params->rect; if (!pl_rect_w(rc)) { rc.x0 = 0; rc.x1 = tpars->w; } if (!pl_rect_h(rc)) { rc.y0 = 0; rc.y1 = tpars->h; } int w, h, tw = abs(pl_rect_w(rc)), th = abs(pl_rect_h(rc)); if (pl_shader_output_size(sh, &w, &h) && (w != tw || h != th)) { PL_ERR(dp, "Trying to dispatch a shader with explicit output size " "requirements %dx%d%s using a target rect of size %dx%d.", w, h, sh->transpose ? " (transposed)" : "", tw, th); goto error; } ident_t vert_pos = NULL; const struct pl_transform2x2 *proj = NULL; if (pl_shader_is_compute(sh)) { // Translate the compute shader to simulate vertices etc. translate_compute_shader(dp, sh, &rc, params); } else { // Add the vertex information encoding the position struct pl_rect2df vert_rect = { .x0 = 2.0 * rc.x0 / tpars->w - 1.0, .y0 = 2.0 * rc.y0 / tpars->h - 1.0, .x1 = 2.0 * rc.x1 / tpars->w - 1.0, .y1 = 2.0 * rc.y1 / tpars->h - 1.0, }; if (sh->transpose) { static const struct pl_transform2x2 transpose_proj = {{{ { 0, 1 }, { 1, 0 }, }}}; proj = &transpose_proj; PL_SWAP(vert_rect.x0, vert_rect.y0); PL_SWAP(vert_rect.x1, vert_rect.y1); } vert_pos = sh_attr_vec2(sh, "position", &vert_rect); } // We need to set pl_pass_params.load_target when either blending is // enabled or we're drawing to some scissored sub-rect of the texture struct pl_rect2d full = { 0, 0, tpars->w, tpars->h }; struct pl_rect2d rc_norm = rc; pl_rect2d_normalize(&rc_norm); rc_norm.x0 = PL_MAX(rc_norm.x0, 0); rc_norm.y0 = PL_MAX(rc_norm.y0, 0); rc_norm.x1 = PL_MIN(rc_norm.x1, tpars->w); rc_norm.y1 = PL_MIN(rc_norm.y1, tpars->h); bool load = params->blend_params || !pl_rect2d_eq(rc_norm, full); struct pass *pass = finalize_pass(dp, sh, params->target, vert_pos, params->blend_params, load, NULL, proj); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the vertex data if (rparams->vertex_data) { uintptr_t vert_base = (uintptr_t) rparams->vertex_data; size_t stride = rparams->pass->params.vertex_stride; for (int i = 0; i < sh->vas.num; i++) { const struct pl_shader_va *sva = &sh->vas.elem[i]; struct pl_vertex_attrib *va = &rparams->pass->params.vertex_attribs[i]; size_t size = sva->attr.fmt->texel_size; uintptr_t va_base = vert_base + va->offset; // use placed offset for (int n = 0; n < 4; n++) memcpy((void *) (va_base + n * stride), sva->data[n], size); } } // For compute shaders: also update the dispatch dimensions if (pl_shader_is_compute(sh)) { int width = abs(pl_rect_w(rc)), height = abs(pl_rect_h(rc)); if (sh->transpose) PL_SWAP(width, height); // Round up to make sure we don't leave off a part of the target int block_w = res->compute_group_size[0], block_h = res->compute_group_size[1], num_x = (width + block_w - 1) / block_w, num_y = (height + block_h - 1) / block_h; rparams->compute_groups[0] = num_x; rparams->compute_groups[1] = num_y; rparams->compute_groups[2] = 1; } else { // Update the scissors for performance rparams->scissors = rc_norm; } // Dispatch the actual shader rparams->target = params->target; rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i].len = 0; pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params) { pl_shader sh = *params->shader; const struct pl_shader_res *res = &sh->res; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (res->input != PL_SHADER_SIG_NONE) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } if (!pl_shader_is_compute(sh)) { PL_ERR(dp, "Trying to dispatch a non-compute shader using " "`pl_dispatch_compute`!"); goto error; } if (sh->vas.num) { if (!params->width || !params->height) { PL_ERR(dp, "Trying to dispatch a targetless compute shader that " "uses vertex attributes, this requires specifying the size " "of the effective rendering area!"); goto error; } compute_vertex_attribs(dp, sh, params->width, params->height, &(ident_t){0}); } struct pass *pass = finalize_pass(dp, sh, NULL, NULL, NULL, false, NULL, NULL); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the dispatch size int groups = 1; for (int i = 0; i < 3; i++) { groups *= params->dispatch_size[i]; rparams->compute_groups[i] = params->dispatch_size[i]; } if (!groups) { pl_assert(params->width && params->height); int block_w = res->compute_group_size[0], block_h = res->compute_group_size[1], num_x = (params->width + block_w - 1) / block_w, num_y = (params->height + block_h - 1) / block_h; rparams->compute_groups[0] = num_x; rparams->compute_groups[1] = num_y; rparams->compute_groups[2] = 1; } // Dispatch the actual shader rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i].len = 0; pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params) { pl_shader sh = *params->shader; const struct pl_shader_res *res = &sh->res; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (res->input != PL_SHADER_SIG_NONE || res->output != PL_SHADER_SIG_COLOR) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } const struct pl_tex_params *tpars = ¶ms->target->params; if (pl_tex_params_dimension(*tpars) != 2 || !tpars->renderable) { PL_ERR(dp, "Trying to dispatch a shader using an invalid target " "texture. The target must be a renderable 2D texture."); goto error; } if (pl_shader_is_compute(sh)) { PL_ERR(dp, "Trying to dispatch a compute shader using pl_dispatch_vertex."); goto error; } if (sh->vas.num) { PL_ERR(dp, "Trying to dispatch a custom vertex shader with already " "attached vertex attributes."); goto error; } if (sh->transpose) { PL_ERR(dp, "Trying to dispatch a transposed shader using " "pl_dispatch_vertex, unlikely to be correct. Erroring as a " "safety precaution!"); goto error; } int pos_idx = params->vertex_position_idx; if (pos_idx < 0 || pos_idx >= params->num_vertex_attribs) { PL_ERR(dp, "Vertex position index out of range?"); goto error; } // Attach all of the vertex attributes to the shader manually sh->vas.num = params->num_vertex_attribs; PL_ARRAY_RESIZE(sh, sh->vas, sh->vas.num); for (int i = 0; i < params->num_vertex_attribs; i++) sh->vas.elem[i].attr = params->vertex_attribs[i]; // Compute the coordinate projection matrix struct pl_transform2x2 proj = pl_transform2x2_identity; switch (params->vertex_coords) { case PL_COORDS_ABSOLUTE: proj.mat.m[0][0] /= tpars->w; proj.mat.m[1][1] /= tpars->h; // fall through case PL_COORDS_RELATIVE: proj.mat.m[0][0] *= 2.0; proj.mat.m[1][1] *= 2.0; proj.c[0] -= 1.0; proj.c[1] -= 1.0; // fall through case PL_COORDS_NORMALIZED: if (params->vertex_flipped) { proj.mat.m[1][1] = -proj.mat.m[1][1]; proj.c[1] += 2.0; } break; } ident_t vert_pos = params->vertex_attribs[pos_idx].name; struct pass *pass = finalize_pass(dp, sh, params->target, vert_pos, params->blend_params, true, params, &proj); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the scissors rparams->scissors = params->scissors; if (params->vertex_flipped) { rparams->scissors.y0 = tpars->h - rparams->scissors.y0; rparams->scissors.y1 = tpars->h - rparams->scissors.y1; } pl_rect2d_normalize(&rparams->scissors); // Dispatch the actual shader rparams->target = params->target; rparams->vertex_count = params->vertex_count; rparams->vertex_data = params->vertex_data; rparams->vertex_buf = params->vertex_buf; rparams->buf_offset = params->buf_offset; rparams->index_data = params->index_data; rparams->index_fmt = params->index_fmt; rparams->index_buf = params->index_buf; rparams->index_offset = params->index_offset; rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i].len = 0; pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } void pl_dispatch_abort(pl_dispatch dp, pl_shader *psh) { pl_shader sh = *psh; if (!sh) return; // Reset this as early as possible to free temporary resources pl_shader_reset(sh, NULL); // Re-add the shader to the internal pool of shaders pl_mutex_lock(&dp->lock); PL_ARRAY_APPEND(dp, dp->shaders, sh); pl_mutex_unlock(&dp->lock); *psh = NULL; } void pl_dispatch_reset_frame(pl_dispatch dp) { pl_mutex_lock(&dp->lock); dp->current_ident = 0; dp->current_index++; garbage_collect_passes(dp); pl_mutex_unlock(&dp->lock); } // Stuff related to caching static const char cache_magic[] = {'P', 'L', 'D', 'P'}; static const uint32_t cache_version = 1; static void write_buf(uint8_t *buf, size_t *pos, const void *src, size_t size) { assert(size); if (buf) memcpy(&buf[*pos], src, size); *pos += size; } #define WRITE(type, var) write_buf(out, &size, &(type){ var }, sizeof(type)) #define LOAD(var) \ do { \ memcpy(&(var), cache, sizeof(var)); \ cache += sizeof(var); \ } while (0) size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out) { size_t size = 0; pl_mutex_lock(&dp->lock); write_buf(out, &size, cache_magic, sizeof(cache_magic)); WRITE(uint32_t, cache_version); // Remember this position so we can go back and write the actual number of // cached programs uint32_t num_passes = 0; void *out_num = out ? &out[size] : NULL; size += sizeof(num_passes); // Save the cached programs for all compiled passes for (int i = 0; i < dp->passes.num; i++) { const struct pass *pass = dp->passes.elem[i]; if (!pass->pass) continue; const struct pl_pass_params *params = &pass->pass->params; if (!params->cached_program_len) continue; if (out) { PL_DEBUG(dp, "Saving %zu bytes of cached program with signature 0x%llx", params->cached_program_len, (unsigned long long) pass->signature); } num_passes++; WRITE(uint64_t, pass->signature); WRITE(uint64_t, params->cached_program_len); write_buf(out, &size, params->cached_program, params->cached_program_len); } // Re-save the cached programs for all previously loaded (but not yet // compiled) passes. This is simply to make `pl_dispatch_load` followed // by `pl_dispatch_save` return the same cache as was previously loaded. for (int i = 0; i < dp->cached_passes.num; i++) { const struct cached_pass *pass = &dp->cached_passes.elem[i]; if (!pass->cached_program_len) continue; if (out) { PL_DEBUG(dp, "Saving %zu bytes of cached program with signature 0x%llx", pass->cached_program_len, (unsigned long long) pass->signature); } num_passes++; WRITE(uint64_t, pass->signature); WRITE(uint64_t, pass->cached_program_len); write_buf(out, &size, pass->cached_program, pass->cached_program_len); } if (out) memcpy(out_num, &num_passes, sizeof(num_passes)); pl_mutex_unlock(&dp->lock); return size; } void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache) { char magic[4]; LOAD(magic); if (memcmp(magic, cache_magic, sizeof(magic)) != 0) { PL_ERR(dp, "Failed loading dispatch cache: invalid magic bytes"); return; } uint32_t version; LOAD(version); if (version != cache_version) { PL_WARN(dp, "Failed loading dispatch cache: wrong version"); return; } uint32_t num; LOAD(num); pl_mutex_lock(&dp->lock); for (int i = 0; i < num; i++) { uint64_t sig, size; LOAD(sig); LOAD(size); if (!size) continue; // Skip passes that are already compiled for (int n = 0; n < dp->passes.num; n++) { if (dp->passes.elem[n]->signature == sig) { PL_DEBUG(dp, "Skipping already compiled pass with signature %llx", (unsigned long long) sig); cache += size; continue; } } // Find a cached_pass entry with this signature, if any struct cached_pass *pass = NULL; for (int n = 0; n < dp->cached_passes.num; n++) { if (dp->cached_passes.elem[n].signature == sig) { pass = &dp->cached_passes.elem[n]; break; } } if (!pass) { // None found, add a new entry PL_ARRAY_GROW(dp, dp->cached_passes); pass = &dp->cached_passes.elem[dp->cached_passes.num++]; *pass = (struct cached_pass) { .signature = sig }; } PL_DEBUG(dp, "Loading %zu bytes of cached program with signature 0x%llx", (size_t) size, (unsigned long long) sig); pl_free((void *) pass->cached_program); pass->cached_program = pl_memdup(dp, cache, size); pass->cached_program_len = size; cache += size; } pl_mutex_unlock(&dp->lock); } libplacebo-v4.192.1/src/dispatch.h000066400000000000000000000024201417677245700167330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Like `pl_dispatch_begin`, but has an extra `unique` parameter. If this is // true, the generated shader will be uniquely namespaced `unique` and may be // freely merged with other shaders (`sh_subpass`). Otherwise, all shaders have // the same namespace and merging them is an error. pl_shader pl_dispatch_begin_ex(pl_dispatch dp, bool unique); // Set the `dynamic_constants` field for newly created `pl_shader` objects. // // This is a private API because it's sort of clunky/stateful. void pl_dispatch_mark_dynamic(pl_dispatch dp, bool dynamic); libplacebo-v4.192.1/src/dither.c000066400000000000000000000125311417677245700164120ustar00rootroot00000000000000/* * Generate a noise texture for dithering images. * Copyright © 2013 Wessel Dankers * * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . * * The original code is taken from mpv, under the same license. */ #include #include #include #include #include #include #include #include "common.h" void pl_generate_bayer_matrix(float *data, int size) { pl_assert(size >= 0); // Start with a single entry of 0 data[0] = 0; for (int sz = 1; sz < size; sz *= 2) { // Make three copies of the current, appropriately shifted and scaled for (int y = 0; y < sz; y ++) { for (int x = 0; x < sz; x++) { int offsets[] = {0, sz * size + sz, sz, sz * size}; int pos = y * size + x; for (int i = 1; i < 4; i++) data[pos + offsets[i]] = data[pos] + i / (4.0 * sz * sz); } } } } #define MAX_SIZEB 8 #define MAX_SIZE (1 << MAX_SIZEB) #define MAX_SIZE2 (MAX_SIZE * MAX_SIZE) typedef uint_fast32_t index_t; #define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1))) #define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb)))) struct ctx { unsigned int sizeb, size, size2; unsigned int gauss_radius; unsigned int gauss_middle; uint64_t gauss[MAX_SIZE2]; index_t randomat[MAX_SIZE2]; bool calcmat[MAX_SIZE2]; uint64_t gaussmat[MAX_SIZE2]; index_t unimat[MAX_SIZE2]; }; static void makegauss(struct ctx *k, unsigned int sizeb) { pl_assert(sizeb >= 1 && sizeb <= MAX_SIZEB); k->sizeb = sizeb; k->size = 1 << k->sizeb; k->size2 = k->size * k->size; k->gauss_radius = k->size / 2 - 1; k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius); unsigned int gauss_size = k->gauss_radius * 2 + 1; unsigned int gauss_size2 = gauss_size * gauss_size; for (index_t c = 0; c < k->size2; c++) k->gauss[c] = 0; double sigma = -log(1.5 / (double) UINT64_MAX * gauss_size2) / k->gauss_radius; for (index_t gy = 0; gy <= k->gauss_radius; gy++) { for (index_t gx = 0; gx <= gy; gx++) { int cx = (int)gx - k->gauss_radius; int cy = (int)gy - k->gauss_radius; int sq = cx * cx + cy * cy; double e = exp(-sqrt(sq) * sigma); uint64_t v = e / gauss_size2 * (double) UINT64_MAX; k->gauss[XY(k, gx, gy)] = k->gauss[XY(k, gy, gx)] = k->gauss[XY(k, gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gy, gauss_size - 1 - gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v; } } #ifndef NDEBUG uint64_t total = 0; for (index_t c = 0; c < k->size2; c++) { uint64_t oldtotal = total; total += k->gauss[c]; assert(total >= oldtotal); } #endif } static void setbit(struct ctx *k, index_t c) { if (k->calcmat[c]) return; k->calcmat[c] = true; uint64_t *m = k->gaussmat; uint64_t *me = k->gaussmat + k->size2; uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c); uint64_t *ge = k->gauss + k->size2; while (g < ge) *m++ += *g++; g = k->gauss; while (m < me) *m++ += *g++; } static index_t getmin(struct ctx *k) { uint64_t min = UINT64_MAX; index_t resnum = 0; unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { if (k->calcmat[c]) continue; uint64_t total = k->gaussmat[c]; if (total <= min) { if (total != min) { min = total; resnum = 0; } k->randomat[resnum++] = c; } } assert(resnum > 0); if (resnum == 1) return k->randomat[0]; if (resnum == size2) return size2 / 2; return k->randomat[rand() % resnum]; } static void makeuniform(struct ctx *k) { unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { index_t r = getmin(k); setbit(k, r); k->unimat[r] = c; } } void pl_generate_blue_noise(float *data, int size) { pl_assert(size > 0); int shift = PL_LOG2(size); pl_assert((1 << shift) == size); struct ctx *k = pl_zalloc_ptr(NULL, k); makegauss(k, shift); makeuniform(k); float invscale = k->size2; for(index_t y = 0; y < k->size; y++) { for(index_t x = 0; x < k->size; x++) data[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale; } pl_free(k); } libplacebo-v4.192.1/src/dummy.c000066400000000000000000000251651417677245700162750ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "gpu.h" const struct pl_gpu_dummy_params pl_gpu_dummy_default_params = { PL_GPU_DUMMY_DEFAULTS }; static const struct pl_gpu_fns pl_fns_dummy; struct priv { struct pl_gpu_fns impl; struct pl_gpu_dummy_params params; }; pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params) { params = PL_DEF(params, &pl_gpu_dummy_default_params); struct pl_gpu *gpu = pl_zalloc_obj(NULL, gpu, struct priv); gpu->log = log; gpu->ctx = gpu->log; gpu->glsl = params->glsl; gpu->limits = params->limits; struct priv *p = PL_PRIV(gpu); p->impl = pl_fns_dummy; p->params = *params; // Forcibly override these, because we know for sure what the values are gpu->limits.align_tex_xfer_pitch = 1; gpu->limits.align_tex_xfer_offset = 1; gpu->limits.align_vertex_stride = 1; // Set up the dummy formats, add one for each possible format type that we // can represent on the host PL_ARRAY(pl_fmt) formats = {0}; for (enum pl_fmt_type type = 1; type < PL_FMT_TYPE_COUNT; type++) { for (int comps = 1; comps <= 4; comps++) { for (int depth = 8; depth < 128; depth *= 2) { if (type == PL_FMT_FLOAT && depth < 16) continue; static const char *cnames[] = { [1] = "r", [2] = "rg", [3] = "rgb", [4] = "rgba", }; static const char *tnames[] = { [PL_FMT_UNORM] = "", [PL_FMT_SNORM] = "s", [PL_FMT_UINT] = "u", [PL_FMT_SINT] = "i", [PL_FMT_FLOAT] = "f", }; const char *tname = tnames[type]; if (type == PL_FMT_FLOAT && depth == 16) tname = "hf"; struct pl_fmt *fmt = pl_alloc_ptr(gpu, fmt); *fmt = (struct pl_fmt) { .name = pl_asprintf(fmt, "%s%d%s", cnames[comps], depth, tname), .type = type, .num_components = comps, .opaque = false, .gatherable = true, .internal_size = comps * depth / 8, .texel_size = comps * depth / 8, .texel_align = 1, .caps = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR | PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLENDABLE | PL_FMT_CAP_VERTEX | PL_FMT_CAP_HOST_READABLE, }; for (int i = 0; i < comps; i++) { fmt->component_depth[i] = depth; fmt->host_bits[i] = depth; fmt->sample_order[i] = i; } if (gpu->glsl.compute) fmt->caps |= PL_FMT_CAP_STORABLE; if (gpu->limits.max_buffer_texels && gpu->limits.max_ubo_size) fmt->caps |= PL_FMT_CAP_TEXEL_UNIFORM; if (gpu->limits.max_buffer_texels && gpu->limits.max_ssbo_size) fmt->caps |= PL_FMT_CAP_TEXEL_STORAGE; fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); fmt->glsl_format = pl_fmt_glsl_format(fmt, comps); fmt->fourcc = pl_fmt_fourcc(fmt); if (!fmt->glsl_format) fmt->caps &= ~(PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE); PL_ARRAY_APPEND(gpu, formats, fmt); } } } gpu->formats = formats.elem; gpu->num_formats = formats.num; return pl_gpu_finalize(gpu); } static void dumb_destroy(pl_gpu gpu) { pl_free((void *) gpu); } void pl_gpu_dummy_destroy(pl_gpu *gpu) { pl_gpu_destroy(*gpu); *gpu = NULL; } struct buf_priv { uint8_t *data; }; static pl_buf dumb_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_buf *buf = pl_zalloc_obj(NULL, buf, struct buf_priv); buf->params = *params; buf->params.initial_data = NULL; struct buf_priv *p = PL_PRIV(buf); p->data = malloc(params->size); if (!p->data) { PL_ERR(gpu, "Failed allocating memory for dummy buffer!"); pl_free(buf); return NULL; } if (params->initial_data) memcpy(p->data, params->initial_data, params->size); if (params->host_mapped) buf->data = p->data; return buf; } static void dumb_buf_destroy(pl_gpu gpu, pl_buf buf) { struct buf_priv *p = PL_PRIV(buf); free(p->data); pl_free((void *) buf); } uint8_t *pl_buf_dummy_data(pl_buf buf) { struct buf_priv *p = PL_PRIV(buf); return p->data; } static void dumb_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size) { struct buf_priv *p = PL_PRIV(buf); memcpy(p->data + buf_offset, data, size); } static bool dumb_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size) { struct buf_priv *p = PL_PRIV(buf); memcpy(dest, p->data + buf_offset, size); return true; } static void dumb_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct buf_priv *dstp = PL_PRIV(dst); struct buf_priv *srcp = PL_PRIV(src); memcpy(dstp->data + dst_offset, srcp->data + src_offset, size); } struct tex_priv { void *data; }; static size_t tex_size(pl_gpu gpu, pl_tex tex) { size_t size = tex->params.format->texel_size * tex->params.w; size *= PL_DEF(tex->params.h, 1); size *= PL_DEF(tex->params.d, 1); return size; } static pl_tex dumb_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_tex *tex = pl_zalloc_obj(NULL, tex, void *); tex->params = *params; tex->params.initial_data = NULL; struct tex_priv *p = PL_PRIV(tex); p->data = malloc(tex_size(gpu, tex)); if (!p->data) { PL_ERR(gpu, "Failed allocating memory for dummy texture!"); pl_free(tex); return NULL; } if (params->initial_data) memcpy(p->data, params->initial_data, tex_size(gpu, tex)); return tex; } pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params) { // Only do minimal sanity checking, since this is just a dummy texture pl_assert(params->format && params->w >= 0 && params->h >= 0 && params->d >= 0); struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct tex_priv); tex->sampler_type = params->sampler_type; tex->params = (struct pl_tex_params) { .w = params->w, .h = params->h, .d = params->d, .format = params->format, .sampleable = true, .user_data = params->user_data, }; return tex; } static void dumb_tex_destroy(pl_gpu gpu, pl_tex tex) { struct tex_priv *p = PL_PRIV(tex); if (p->data) free(p->data); pl_free((void *) tex); } uint8_t *pl_tex_dummy_data(pl_tex tex) { struct tex_priv *p = PL_PRIV(tex); return p->data; } static bool dumb_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; struct tex_priv *p = PL_PRIV(tex); pl_assert(p->data); const uint8_t *src = params->ptr; uint8_t *dst = p->data; if (params->buf) { struct buf_priv *bufp = PL_PRIV(params->buf); src = (uint8_t *) bufp->data + params->buf_offset; } size_t texel_size = tex->params.format->texel_size; size_t row_size = pl_rect_w(params->rc) * texel_size; for (int z = params->rc.z0; z < params->rc.z1; z++) { size_t src_plane = z * params->depth_pitch; size_t dst_plane = z * tex->params.h * tex->params.w * texel_size; for (int y = params->rc.y0; y < params->rc.y1; y++) { size_t src_row = src_plane + y * params->row_pitch; size_t dst_row = dst_plane + y * tex->params.w * texel_size; size_t pos = params->rc.x0 * texel_size; memcpy(&dst[dst_row + pos], &src[src_row + pos], row_size); } } return true; } static bool dumb_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; struct tex_priv *p = PL_PRIV(tex); pl_assert(p->data); const uint8_t *src = p->data; uint8_t *dst = params->ptr; if (params->buf) { struct buf_priv *bufp = PL_PRIV(params->buf); dst = (uint8_t *) bufp->data + params->buf_offset; } size_t texel_size = tex->params.format->texel_size; size_t row_size = pl_rect_w(params->rc) * texel_size; for (int z = params->rc.z0; z < params->rc.z1; z++) { size_t src_plane = z * tex->params.h * tex->params.w * texel_size; size_t dst_plane = z * params->depth_pitch; for (int y = params->rc.y0; y < params->rc.y1; y++) { size_t src_row = src_plane + y * tex->params.w * texel_size; size_t dst_row = dst_plane + y * params->row_pitch; size_t pos = params->rc.x0 * texel_size; memcpy(&dst[dst_row + pos], &src[src_row + pos], row_size); } } return true; } static int dumb_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return 0; // safest behavior: never alias bindings } static pl_pass dumb_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { PL_ERR(gpu, "Creating render passes is not supported for dummy GPUs"); return NULL; } static void dumb_gpu_finish(pl_gpu gpu) { // no-op } static const struct pl_gpu_fns pl_fns_dummy = { .destroy = dumb_destroy, .buf_create = dumb_buf_create, .buf_destroy = dumb_buf_destroy, .buf_write = dumb_buf_write, .buf_read = dumb_buf_read, .buf_copy = dumb_buf_copy, .tex_create = dumb_tex_create, .tex_destroy = dumb_tex_destroy, .tex_upload = dumb_tex_upload, .tex_download = dumb_tex_download, .desc_namespace = dumb_desc_namespace, .pass_create = dumb_pass_create, .gpu_finish = dumb_gpu_finish, }; libplacebo-v4.192.1/src/filters.c000066400000000000000000000476411417677245700166150ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ /* * Some of the filter code originally derives (via mpv) from Glumpy: * # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved. * # Distributed under the (new) BSD License. * (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py) * * The math underlying each filter function was written from scratch, with * some algorithms coming from a number of different sources, including: * - https://en.wikipedia.org/wiki/Window_function * - https://en.wikipedia.org/wiki/Jinc * - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h * - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on * dither plugin for avisynth from the same author: * https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc * - Paul Heckbert's "zoom" * - XBMC: ConvolutionKernels.cpp etc. * - https://github.com/AviSynth/jinc-resize (only used to verify the math) */ #include #include "common.h" #include "filters.h" #include "log.h" bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b) { if (!a || !b) return a == b; bool r = a->resizable == b->resizable && a->weight == b->weight && a->radius == b->radius; for (int i = 0; i < PL_FILTER_MAX_PARAMS; i++) { r &= a->tunable[i] == b->tunable[i]; if (a->tunable[i]) r &= a->params[i] == b->params[i]; } return r; } bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b) { if (!a || !b) return a == b; return pl_filter_function_eq(a->kernel, b->kernel) && pl_filter_function_eq(a->window, b->window) && a->clamp == b->clamp && a->blur == b->blur && a->taper == b->taper && a->polar == b->polar; } double pl_filter_sample(const struct pl_filter_config *c, double x) { double radius = c->kernel->radius; // All filters are symmetric, and in particular only need to be defined // for [0, radius]. x = fabs(x); // Apply the blur and taper coefficients as needed double kx = c->blur > 0.0 ? x / c->blur : x; kx = kx <= c->taper ? 0.0 : (kx - c->taper) / (1.0 - c->taper / radius); // Return early for values outside of the kernel radius, since the functions // are not necessarily valid outside of this interval. No such check is // needed for the window, because it's always stretched to fit. if (kx > radius) return 0.0; double k = c->kernel->weight(c->kernel, kx); // Apply the optional windowing function if (c->window) k *= c->window->weight(c->window, x / radius * c->window->radius); return k < 0 ? (1 - c->clamp) * k : k; } // Compute a single row of weights for a given filter in one dimension, indexed // by the indicated subpixel offset. Writes `f->row_size` values to `out`. static void compute_row(struct pl_filter *f, double offset, float *out) { double wsum = 0.0; for (int i = 0; i < f->row_size; i++) { // For the example of a filter with row size 4 and offset 0.3, we have: // // 0 1 * 2 3 // // * indicates the sampled position. What we want to compute is the // distance from each index to that sampled position. pl_assert(f->row_size % 2 == 0); const int base = f->row_size / 2 - 1; // index to the left of the center const double center = base + offset; // offset of center relative to idx 0 double x = i - center; // Stretch/squish the kernel by readjusting the value range x *= f->params.config.kernel->radius / f->radius; double w = pl_filter_sample(&f->params.config, x); out[i] = w; wsum += w; } // Readjust weights to preserve energy pl_assert(wsum > 0); for (int i = 0; i < f->row_size; i++) out[i] /= wsum; } static struct pl_filter_function *dupfilter(void *alloc, const struct pl_filter_function *f) { return f ? pl_memdup(alloc, (void *)f, sizeof(*f)) : NULL; } pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params) { pl_assert(params); if (params->lut_entries <= 0 || !params->config.kernel) { pl_fatal(log, "Invalid params: missing lut_entries or config.kernel"); return NULL; } struct pl_filter *f = pl_zalloc_ptr(NULL, f); f->params = *params; f->params.config.kernel = dupfilter(f, params->config.kernel); f->params.config.window = dupfilter(f, params->config.window); // Compute the required filter radius float radius = f->params.config.kernel->radius; f->radius = radius; if (params->filter_scale > 1.0) f->radius *= params->filter_scale; float *weights; if (params->config.polar) { // Compute a 1D array indexed by radius weights = pl_alloc(f, params->lut_entries * sizeof(float)); f->radius_cutoff = 0.0; for (int i = 0; i < params->lut_entries; i++) { double x = radius * i / (params->lut_entries - 1); weights[i] = pl_filter_sample(&f->params.config, x); if (fabs(weights[i]) > params->cutoff) f->radius_cutoff = x; } } else { // Pick the most appropriate row size f->row_size = ceil(f->radius) * 2; if (params->max_row_size && f->row_size > params->max_row_size) { pl_info(log, "Required filter size %d exceeds the maximum allowed " "size of %d. This may result in adverse effects (aliasing, " "or moiré artifacts).", f->row_size, params->max_row_size); f->row_size = params->max_row_size; f->insufficient = true; } f->row_stride = PL_ALIGN(f->row_size, params->row_stride_align); // Compute a 2D array indexed by the subpixel position weights = pl_calloc(f, params->lut_entries * f->row_stride, sizeof(float)); for (int i = 0; i < params->lut_entries; i++) { compute_row(f, i / (double)(params->lut_entries - 1), weights + f->row_stride * i); } } f->weights = weights; return f; } void pl_filter_free(pl_filter *filter) { pl_free_ptr((void **) filter); } const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name) { if (!name) return NULL; for (int i = 0; pl_filter_function_presets[i].name; i++) { if (strcmp(pl_filter_function_presets[i].name, name) == 0) return &pl_filter_function_presets[i]; } return NULL; } const struct pl_filter_preset *pl_find_filter_preset(const char *name) { if (!name) return NULL; for (int i = 0; pl_filter_presets[i].name; i++) { if (strcmp(pl_filter_presets[i].name, name) == 0) return &pl_filter_presets[i]; } return NULL; } // Built-in filter functions static double box(const struct pl_filter_function *f, double x) { return x < 0.5 ? 1.0 : 0.0; } const struct pl_filter_function pl_filter_function_box = { .weight = box, .radius = 1.0, }; static double triangle(const struct pl_filter_function *f, double x) { return 1.0 - x / f->radius; } const struct pl_filter_function pl_filter_function_triangle = { .resizable = true, .weight = triangle, .radius = 1.0, }; static double cosine(const struct pl_filter_function *f, double x) { return cos(x); } const struct pl_filter_function pl_filter_function_cosine = { .weight = cosine, .radius = M_PI / 2.0, }; static double hann(const struct pl_filter_function *f, double x) { return 0.5 + 0.5 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hann = { .weight = hann, .radius = 1.0, }; static double hamming(const struct pl_filter_function *f, double x) { return 0.54 + 0.46 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hamming = { .weight = hamming, .radius = 1.0, }; static double welch(const struct pl_filter_function *f, double x) { return 1.0 - x * x; } const struct pl_filter_function pl_filter_function_welch = { .weight = welch, .radius = 1.0, }; static double bessel_i0(double x) { double s = 1.0; double y = x * x / 4.0; double t = y; int i = 2; while (t > 1e-12) { s += t; t *= y / (i * i); i += 1; } return s; } static double kaiser(const struct pl_filter_function *f, double x) { double alpha = fmax(f->params[0], 0.0); return bessel_i0(alpha * sqrt(1.0 - x * x)) / alpha; } const struct pl_filter_function pl_filter_function_kaiser = { .tunable = {true}, .weight = kaiser, .radius = 1.0, .params = {2.0}, }; static double blackman(const struct pl_filter_function *f, double x) { double a = f->params[0]; double a0 = (1 - a) / 2.0, a1 = 1 / 2.0, a2 = a / 2.0; x *= M_PI; return a0 + a1 * cos(x) + a2 * cos(2 * x); } const struct pl_filter_function pl_filter_function_blackman = { .tunable = {true}, .weight = blackman, .radius = 1.0, .params = {0.16}, }; static double bohman(const struct pl_filter_function *f, double x) { double pix = M_PI * x; return (1.0 - x) * cos(pix) + sin(pix) / M_PI; } const struct pl_filter_function pl_filter_function_bohman = { .weight = bohman, .radius = 1.0, }; static double gaussian(const struct pl_filter_function *f, double x) { return exp(-2.0 * x * x / f->params[0]); } const struct pl_filter_function pl_filter_function_gaussian = { .resizable = true, .tunable = {true}, .weight = gaussian, .radius = 2.0, .params = {1.0}, }; static double quadratic(const struct pl_filter_function *f, double x) { if (x < 0.5) { return 0.75 - x * x; } else { return 0.5 * (x - 1.5) * (x - 1.5); } } const struct pl_filter_function pl_filter_function_quadratic = { .weight = quadratic, .radius = 1.5, }; static double sinc(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return sin(x) / x; } const struct pl_filter_function pl_filter_function_sinc = { .resizable = true, .weight = sinc, .radius = 1.0, }; static double jinc(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 2.0 * j1(x) / x; } const struct pl_filter_function pl_filter_function_jinc = { .resizable = true, .weight = jinc, .radius = 1.2196698912665045, // first zero }; static double sphinx(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 3.0 * (sin(x) - x * cos(x)) / (x * x * x); } const struct pl_filter_function pl_filter_function_sphinx = { .resizable = true, .weight = sphinx, .radius = 1.4302966531242027, // first zero }; static double bcspline(const struct pl_filter_function *f, double x) { double b = f->params[0], c = f->params[1]; double p0 = (6.0 - 2.0 * b) / 6.0, p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0, p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0, q0 = (8.0 * b + 24.0 * c) / 6.0, q1 = (-12.0 * b - 48.0 * c) / 6.0, q2 = (6.0 * b + 30.0 * c) / 6.0, q3 = (-b - 6.0 * c) / 6.0; // Needed to ensure the kernel is sanely scaled, i.e. bcspline(0.0) = 1.0 double scale = 1.0 / p0; if (x < 1.0) { return scale * (p0 + x * x * (p2 + x * p3)); } else if (x < 2.0) { return scale * (q0 + x * (q1 + x * (q2 + x * q3))); } return 0.0; } const struct pl_filter_function pl_filter_function_bcspline = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {0.5, 0.5}, }; const struct pl_filter_function pl_filter_function_catmull_rom = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {0.0, 0.5}, }; const struct pl_filter_function pl_filter_function_mitchell = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {1/3.0, 1/3.0}, }; const struct pl_filter_function pl_filter_function_robidoux = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {12 / (19 + 9 * M_SQRT2), 113 / (58 + 216 * M_SQRT2)}, }; const struct pl_filter_function pl_filter_function_robidouxsharp = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {6 / (13 + 7 * M_SQRT2), 7 / (2 + 12 * M_SQRT2)}, }; #define POW3(x) ((x) <= 0 ? 0 : (x) * (x) * (x)) static double bicubic(const struct pl_filter_function *f, double x) { return (1.0/6.0) * ( 1 * POW3(x + 2) - 4 * POW3(x + 1) + 6 * POW3(x + 0) - 4 * POW3(x - 1)); } const struct pl_filter_function pl_filter_function_bicubic = { .weight = bicubic, .radius = 2.0, }; static double spline16(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0; } else { return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1); } } const struct pl_filter_function pl_filter_function_spline16 = { .weight = spline16, .radius = 2.0, }; static double spline36(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0; } else if (x < 2.0) { return ((-6.0/11.0 * (x-1) + 270.0/209.0) * (x-1) - 156.0/ 209.0) * (x-1); } else { return ((1.0/11.0 * (x-2) - 45.0/209.0) * (x-2) + 26.0/209.0) * (x-2); } } const struct pl_filter_function pl_filter_function_spline36 = { .weight = spline36, .radius = 3.0, }; static double spline64(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((49.0/41.0 * x - 6387.0/2911.0) * x - 3.0/2911.0) * x + 1.0; } else if (x < 2.0) { return ((-24.0/41.0 * (x-1) + 4032.0/2911.0) * (x-1) - 2328.0/2911.0) * (x-1); } else if (x < 3.0) { return ((6.0/41.0 * (x-2) - 1008.0/2911.0) * (x-2) + 582.0/2911.0) * (x-2); } else { return ((-1.0/41.0 * (x-3) + 168.0/2911.0) * (x-3) - 97.0/2911.0) * (x-3); } } const struct pl_filter_function pl_filter_function_spline64 = { .weight = spline64, .radius = 4.0, }; // Named filter functions const struct pl_filter_function_preset pl_filter_function_presets[] = { {"none", NULL}, {"box", &pl_filter_function_box}, {"dirichlet", &pl_filter_function_box}, // alias {"triangle", &pl_filter_function_triangle}, {"cosine", &pl_filter_function_cosine}, {"hann", &pl_filter_function_hann}, {"hanning", &pl_filter_function_hann}, // alias {"hamming", &pl_filter_function_hamming}, {"welch", &pl_filter_function_welch}, {"kaiser", &pl_filter_function_kaiser}, {"blackman", &pl_filter_function_blackman}, {"bohman", &pl_filter_function_bohman}, {"gaussian", &pl_filter_function_gaussian}, {"quadratic", &pl_filter_function_quadratic}, {"quadric", &pl_filter_function_quadratic}, // alias {"sinc", &pl_filter_function_sinc}, {"jinc", &pl_filter_function_jinc}, {"sphinx", &pl_filter_function_sphinx}, {"bcspline", &pl_filter_function_bcspline}, {"hermite", &pl_filter_function_bcspline}, // alias {"catmull_rom", &pl_filter_function_catmull_rom}, {"mitchell", &pl_filter_function_mitchell}, {"robidoux", &pl_filter_function_robidoux}, {"robidouxsharp", &pl_filter_function_robidouxsharp}, {"bicubic", &pl_filter_function_bicubic}, {"spline16", &pl_filter_function_spline16}, {"spline36", &pl_filter_function_spline36}, {"spline64", &pl_filter_function_spline64}, {0}, }; const int pl_num_filter_function_presets = PL_ARRAY_SIZE(pl_filter_function_presets) - 1; // Built-in filter function presets const struct pl_filter_config pl_filter_spline16 = { .kernel = &pl_filter_function_spline16, }; const struct pl_filter_config pl_filter_spline36 = { .kernel = &pl_filter_function_spline36, }; const struct pl_filter_config pl_filter_spline64 = { .kernel = &pl_filter_function_spline64, }; const struct pl_filter_config pl_filter_nearest = { .kernel = &pl_filter_function_box, }; const struct pl_filter_config pl_filter_bilinear = { .kernel = &pl_filter_function_triangle, }; const struct pl_filter_config pl_filter_gaussian = { .kernel = &pl_filter_function_gaussian, }; // Sinc configured to three taps static const struct pl_filter_function sinc3 = { .resizable = true, .weight = sinc, .radius = 3.0, }; const struct pl_filter_config pl_filter_sinc = { .kernel = &sinc3, }; const struct pl_filter_config pl_filter_lanczos = { .kernel = &sinc3, .window = &pl_filter_function_sinc, }; const struct pl_filter_config pl_filter_ginseng = { .kernel = &sinc3, .window = &pl_filter_function_jinc, }; // Jinc configured to three taps static const struct pl_filter_function jinc3 = { .resizable = true, .weight = jinc, .radius = 3.2383154841662362, // third zero }; const struct pl_filter_config pl_filter_ewa_jinc = { .kernel = &jinc3, .polar = true, }; const struct pl_filter_config pl_filter_ewa_lanczos = { .kernel = &jinc3, .window = &pl_filter_function_jinc, .polar = true, }; const struct pl_filter_config pl_filter_ewa_ginseng = { .kernel = &jinc3, .window = &pl_filter_function_sinc, .polar = true, }; const struct pl_filter_config pl_filter_ewa_hann = { .kernel = &jinc3, .window = &pl_filter_function_hann, .polar = true, }; const struct pl_filter_config pl_filter_haasnsoft = { .kernel = &jinc3, .window = &pl_filter_function_hann, // The blur is tuned to equal out orthogonal and diagonal contributions // on a regular grid. This has the effect of almost completely killing // aliasing. .blur = 1.11, .polar = true, }; // Spline family const struct pl_filter_config pl_filter_bicubic = { .kernel = &pl_filter_function_bicubic, }; const struct pl_filter_config pl_filter_catmull_rom = { .kernel = &pl_filter_function_catmull_rom, }; const struct pl_filter_config pl_filter_mitchell = { .kernel = &pl_filter_function_mitchell, }; const struct pl_filter_config pl_filter_mitchell_clamp = { .kernel = &pl_filter_function_mitchell, .clamp = 1.0, }; const struct pl_filter_config pl_filter_robidoux = { .kernel = &pl_filter_function_robidoux, }; const struct pl_filter_config pl_filter_robidouxsharp = { .kernel = &pl_filter_function_robidouxsharp, }; const struct pl_filter_config pl_filter_ewa_robidoux = { .kernel = &pl_filter_function_robidoux, .polar = true, }; const struct pl_filter_config pl_filter_ewa_robidouxsharp = { .kernel = &pl_filter_function_robidouxsharp, .polar = true, }; // Named filter configs const struct pl_filter_preset pl_filter_presets[] = { {"none", NULL, "Built-in sampling"}, COMMON_FILTER_PRESETS, {0} }; const int pl_num_filter_presets = PL_ARRAY_SIZE(pl_filter_presets) - 1; libplacebo-v4.192.1/src/filters.h000066400000000000000000000056531417677245700166170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #define COMMON_FILTER_PRESETS \ /* Highest priority / recommended filters */ \ {"bilinear", &pl_filter_bilinear, "Bilinear"}, \ {"nearest", &pl_filter_nearest, "Nearest neighbour"}, \ {"bicubic", &pl_filter_bicubic, "Bicubic"}, \ {"lanczos", &pl_filter_lanczos, "Lanczos"}, \ {"ewa_lanczos", &pl_filter_ewa_lanczos, "Jinc (EWA Lanczos)"}, \ {"gaussian", &pl_filter_gaussian, "Gaussian"}, \ {"spline16", &pl_filter_spline16, "Spline (2 taps)"}, \ {"spline36", &pl_filter_spline36, "Spline (3 taps)"}, \ {"spline64", &pl_filter_spline64, "Spline (4 taps)"}, \ {"mitchell", &pl_filter_mitchell, "Mitchell-Netravali"}, \ \ /* Remaining filters */ \ {"sinc", &pl_filter_sinc, "Sinc (unwindowed)"}, \ {"ginseng", &pl_filter_ginseng, "Ginseng (Jinc-Sinc)"}, \ {"ewa_jinc", &pl_filter_ewa_jinc, "EWA Jinc (unwindowed)"}, \ {"ewa_ginseng", &pl_filter_ewa_ginseng, "EWA Ginseng"}, \ {"ewa_hann", &pl_filter_ewa_hann, "EWA Hann"}, \ {"catmull_rom", &pl_filter_catmull_rom, "Catmull-Rom"}, \ {"robidoux", &pl_filter_robidoux, "Robidoux"}, \ {"robidouxsharp", &pl_filter_robidouxsharp, "RobidouxSharp"}, \ {"ewa_robidoux", &pl_filter_ewa_robidoux, "EWA Robidoux"}, \ {"ewa_robidouxsharp", &pl_filter_ewa_robidouxsharp, "EWA RobidouxSharp"}, \ \ /* Aliases */ \ {"triangle", &pl_filter_bilinear}, \ {"ewa_hanning", &pl_filter_ewa_hann} libplacebo-v4.192.1/src/format.c000066400000000000000000000403471417677245700164310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" static int ccStrPrintInt32( char *str, int32_t n ); static int ccStrPrintUint32( char *str, uint32_t n ); static int ccStrPrintInt64( char *str, int64_t n ); static int ccStrPrintUint64( char *str, uint64_t n ); static int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ); static int ccSeqParseInt64( char *seq, int seqlength, int64_t *retint ); static int ccSeqParseDouble( char *seq, int seqlength, double *retdouble ); void pl_str_append_asprintf_c(void *alloc, pl_str *str, const char *fmt, ...) { va_list ap; va_start(ap, fmt); pl_str_append_vasprintf_c(alloc, str, fmt, ap); va_end(ap); } void pl_str_append_vasprintf_c(void *alloc, pl_str *str, const char *fmt, va_list ap) { for (const char *c; (c = strchr(fmt, '%')) != NULL; fmt = c + 1) { // Append the preceding string literal pl_str_append(alloc, str, (pl_str) { (uint8_t *) fmt, c - fmt }); c++; // skip '%' char buf[32]; int len; // The format character follows the % sign switch (c[0]) { case '%': pl_str_append(alloc, str, pl_str0("%")); continue; case 'c': buf[0] = (char) va_arg(ap, int); pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, 1 }); continue; case 's': { const char *arg = va_arg(ap, const char *); pl_str_append(alloc, str, pl_str0(arg)); continue; } case '.': { // only used for %.*s assert(c[1] == '*'); assert(c[2] == 's'); pl_str arg; arg.len = va_arg(ap, int); arg.buf = (uint8_t *) va_arg(ap, char *); pl_str_append(alloc, str, arg); c += 2; // skip '*s' continue; } case 'd': len = ccStrPrintInt32(buf, va_arg(ap, int)); pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, len }); continue; case 'u': len = ccStrPrintUint32(buf, va_arg(ap, unsigned int)); pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, len }); continue; case 'l': assert(c[1] == 'l'); switch (c[2]) { case 'u': len = ccStrPrintUint64(buf, va_arg(ap, unsigned long long)); break; case 'd': len = ccStrPrintInt64(buf, va_arg(ap, long long)); break; default: abort(); } pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, len }); c += 2; continue; case 'z': assert(c[1] == 'u'); len = ccStrPrintUint64(buf, va_arg(ap, size_t)); pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, len }); c++; continue; case 'f': len = ccStrPrintDouble(buf, sizeof(buf), 20, va_arg(ap, double)); pl_str_append(alloc, str, (pl_str) { (uint8_t *) buf, len }); continue; default: fprintf(stderr, "Invalid conversion character: '%c'!\n", c[0]); abort(); } } // Append the remaining string literal pl_str_append(alloc, str, pl_str0(fmt)); } bool pl_str_parse_double(pl_str str, double *out) { return ccSeqParseDouble((char *) str.buf, str.len, out); } bool pl_str_parse_int64(pl_str str, int64_t *out) { return ccSeqParseInt64((char *) str.buf, str.len, out); } /* ***************************************************************************** * * Copyright (c) 2007-2016 Alexis Naveros. * Modified for use with libplacebo by Niklas Haas * Changes include: * - Removed a CC_MIN macro dependency by equivalent logic * - Removed CC_ALWAYSINLINE * - Fixed (!seq) check to (!seqlength) * - Added support for scientific notation (e.g. 1.0e10) in ccSeqParseDouble * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * ----------------------------------------------------------------------------- */ static const char ccStrPrintDecimalTable[201] = { "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899" }; static inline int ccStrPrintLength32( uint32_t n ) { int size; if( n >= 10000 ) { if( n >= 10000000 ) { if( n >= 1000000000 ) size = 10; else if( n >= 100000000 ) size = 9; else size = 8; } else if( n >= 1000000 ) size = 7; else if( n >= 100000 ) size = 6; else size = 5; } else { if( n >= 100 ) { if( n >= 1000 ) size = 4; else size = 3; } else if( n >= 10 ) size = 2; else size = 1; } return size; } static inline int ccStrPrintLength64( uint64_t n ) { int size; if( n >= 10000 ) { if( n >= 10000000 ) { if( n >= 10000000000LL ) { if( n >= 10000000000000LL ) { if( n >= 10000000000000000LL ) { if( n >= 10000000000000000000ULL ) size = 20; else if( n >= 1000000000000000000LL ) size = 19; else if( n >= 100000000000000000LL ) size = 18; else size = 17; } else if( n >= 1000000000000000LL ) size = 16; else if( n >= 100000000000000LL ) size = 15; else size = 14; } else if( n >= 1000000000000LL ) size = 13; else if( n >= 100000000000LL ) size = 12; else size = 11; } else if( n >= 1000000000 ) size = 10; else if( n >= 100000000 ) size = 9; else size = 8; } else { if( n >= 1000000 ) size = 7; else if( n >= 100000 ) size = 6; else size = 5; } } else if( n >= 100 ) { if( n >= 1000 ) size = 4; else size = 3; } else if( n >= 10 ) size = 2; else size = 1; return size; } static int ccStrPrintInt32( char *str, int32_t n ) { int sign, size, retsize, pos; uint32_t val32; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } sign = -( n < 0 ); val32 = ( n ^ sign ) - sign; size = ccStrPrintLength32( val32 ); if( sign ) { size++; str[0] = '-'; } retsize = size; str[size] = 0; str += size - 1; while( val32 >= 100 ) { pos = val32 % 100; val32 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val32 > 0 ) { *str-- = '0' + ( val32 % 10 ); val32 /= 10; } return retsize; } static int ccStrPrintUint32( char *str, uint32_t n ) { int size, retsize, pos; uint32_t val32; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } val32 = n; size = ccStrPrintLength32( val32 ); retsize = size; str[size] = 0; str += size - 1; while( val32 >= 100 ) { pos = val32 % 100; val32 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val32 > 0 ) { *str-- = '0' + ( val32 % 10 ); val32 /= 10; } return retsize; } static int ccStrPrintInt64( char *str, int64_t n ) { int sign, size, retsize, pos; uint64_t val64; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } sign = -( n < 0 ); val64 = ( n ^ sign ) - sign; size = ccStrPrintLength64( val64 ); if( sign ) { size++; str[0] = '-'; } retsize = size; str[size] = 0; str += size - 1; while( val64 >= 100 ) { pos = val64 % 100; val64 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val64 > 0 ) { *str-- = '0' + ( val64 % 10 ); val64 /= 10; } return retsize; } static int ccStrPrintUint64( char *str, uint64_t n ) { int size, retsize, pos; uint64_t val64; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } val64 = n; size = ccStrPrintLength64( val64 ); retsize = size; str[size] = 0; str += size - 1; while( val64 >= 100 ) { pos = val64 % 100; val64 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val64 > 0 ) { *str-- = '0' + ( val64 % 10 ); val64 /= 10; } return retsize; } #define CC_STR_PRINT_BUFSIZE_INT32 (12) #define CC_STR_PRINT_BUFSIZE_UINT32 (11) #define CC_STR_PRINT_BUFSIZE_INT64 (21) #define CC_STR_PRINT_BUFSIZE_UINT64 (20) #define CC_STR_PRINT_DOUBLE_MAX_DECIMAL (24) static const double ccStrPrintBiasTable[CC_STR_PRINT_DOUBLE_MAX_DECIMAL+1] = { 0.5, 0.05, 0.005, 0.0005, 0.00005, 0.000005, 0.0000005, 0.00000005, 0.000000005, 0.0000000005, 0.00000000005, 0.000000000005, 0.0000000000005, 0.00000000000005, 0.000000000000005, 0.0000000000000005, 0.00000000000000005, 0.000000000000000005, 0.0000000000000000005, 0.00000000000000000005, 0.000000000000000000005, 0.0000000000000000000005, 0.00000000000000000000005, 0.000000000000000000000005, 0.0000000000000000000000005 }; static int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ) { int size, offset, index; int32_t frac, accumsub; double muldec; uint32_t u32; uint64_t u64; size = 0; if( value < 0.0 ) { size = 1; *str++ = '-'; bufsize--; value = -value; } /* Add bias matching the count of desired decimals in order to round the right way */ if( decimals > CC_STR_PRINT_DOUBLE_MAX_DECIMAL ) decimals = CC_STR_PRINT_DOUBLE_MAX_DECIMAL; value += ccStrPrintBiasTable[decimals]; if( value < 4294967296.0 ) { if( bufsize < CC_STR_PRINT_BUFSIZE_UINT32 ) goto error; u32 = (int32_t)value; offset = ccStrPrintUint32( str, u32 ); size += offset; bufsize -= size; value -= (double)u32; } else if( value < 18446744073709551616.0 ) { if( bufsize < CC_STR_PRINT_BUFSIZE_UINT64 ) goto error; u64 = (int64_t)value; offset = ccStrPrintUint64( str, u64 ); size += offset; bufsize -= size; value -= (double)u64; } else goto error; if (decimals > bufsize - 2) decimals = bufsize - 2; if( decimals <= 0 ) return size; str[offset] = '.'; muldec = 10.0; accumsub = 0; str += offset + 1; for( index = 0 ; index < decimals ; index++ ) { frac = (int32_t)( value * muldec ) - accumsub; frac = PL_CLAMP(frac, 0, 9); // FIXME: why is this needed? str[index] = '0' + (char)frac; accumsub += frac; accumsub = ( accumsub << 3 ) + ( accumsub << 1 ); if( muldec < 10000000 ) muldec *= 10.0; else { value *= 10000000.0; value -= (int32_t)value; muldec = 10.0; accumsub = 0; } } str[ index ] = 0; size += index + 1; return size; error: if( bufsize < 4 ) *str = 0; else { str[0] = 'E'; str[1] = 'R'; str[2] = 'R'; str[3] = 0; } return 0; } #define CC_CHAR_IS_DELIMITER(c) ((c)<=' ') static int ccSeqParseInt64( char *seq, int seqlength, int64_t *retint ) { int i, negflag; char c; int64_t workint; *retint = 0; if( !( seqlength ) ) return 0; negflag = 0; i = 0; if( *seq == '-' ) { negflag = 1; i = 1; } else if( *seq == '+' ) i = 1; workint = 0; for( ; i < seqlength ; i++ ) { c = seq[i]; if( ( c >= '0' ) && ( c <= '9' ) ) { if( workint >= (int64_t)0xcccccccccccccccLL ) return 0; workint = ( workint * 10 ) + ( c - '0' ); } else if( CC_CHAR_IS_DELIMITER( c ) ) break; else return 0; } if( negflag ) workint = -workint; *retint = workint; return 1; } // Function copied from musl libc exp10(), to avoid portability issues // Copyright (c) 2005-2020 Rich Felker, et al. // Available under the terms of the MIT license static inline double ccExp10(double x) { static const double p10[] = { 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15 }; double n, y = modf(x, &n); union {double f; uint64_t i;} u = {n}; /* fabs(n) < 16 without raising invalid on nan */ if ((u.i>>52 & 0x7ff) < 0x3ff+4) { if (!y) return p10[(int)n+15]; y = exp2(3.32192809488736234787031942948939 * y); return y * p10[(int)n+15]; } return pow(10.0, x); } static int ccSeqParseDouble( char *seq, int seqlength, double *retdouble ) { int i, negflag; char c; double accum; double decfactor; int64_t exponent; *retdouble = 0.0; i = 0; if( !( seqlength ) ) return 0; negflag = ( seq[i] == '-' ); i += negflag; accum = 0.0; for( ; i < seqlength ; i++ ) { c = seq[i]; if( ( c >= '0' ) && ( c <= '9' ) ) accum = ( accum * 10.0 ) + (double)( c - '0' ); else if( CC_CHAR_IS_DELIMITER( c ) ) goto done; else if( c == 'e' || c == 'E' ) goto sci; else if( c == '.' ) break; else return 0; } i++; decfactor = 0.1; for( ; i < seqlength ; i++ ) { c = seq[i]; if( ( c >= '0' ) && ( c <= '9' ) ) { accum += (double)( c - '0' ) * decfactor; decfactor *= 0.1; } else if( CC_CHAR_IS_DELIMITER( c ) ) goto done; else if( c == 'e' || c == 'E' ) goto sci; else return 0; } done: if( negflag ) accum = -accum; *retdouble = (double)accum; return 1; sci: i++; if( !ccSeqParseInt64( seq + i, seqlength - i, &exponent ) ) return 0; accum *= ccExp10 ( exponent ); goto done; } libplacebo-v4.192.1/src/glsl/000077500000000000000000000000001417677245700157265ustar00rootroot00000000000000libplacebo-v4.192.1/src/glsl/glslang.cc000066400000000000000000000204571417677245700176740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "config_internal.h" #include #include extern "C" { #include "pl_alloc.h" } #include #include #include #include "glslang.h" #define GLSLANG_VERSION_CHECK(major, minor, patch) \ (((major) < GLSLANG_VERSION_MAJOR) || ((major) == GLSLANG_VERSION_MAJOR && \ (((minor) < GLSLANG_VERSION_MINOR) || ((minor) == GLSLANG_VERSION_MINOR && \ ((patch) <= GLSLANG_VERSION_PATCH))))) using namespace glslang; static pthread_mutex_t pl_glslang_mutex = PTHREAD_MUTEX_INITIALIZER; static int pl_glslang_refcount; bool pl_glslang_init(void) { bool ret = true; pthread_mutex_lock(&pl_glslang_mutex); if (pl_glslang_refcount++ == 0) ret = InitializeProcess(); pthread_mutex_unlock(&pl_glslang_mutex); return ret; } void pl_glslang_uninit(void) { pthread_mutex_lock(&pl_glslang_mutex); if (--pl_glslang_refcount == 0) FinalizeProcess(); pthread_mutex_unlock(&pl_glslang_mutex); } extern const TBuiltInResource DefaultTBuiltInResource; struct pl_glslang_res *pl_glslang_compile(const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *text) { assert(pl_glslang_refcount); struct pl_glslang_res *res = pl_zalloc_ptr(NULL, res); EShLanguage lang; switch (stage) { case GLSL_SHADER_VERTEX: lang = EShLangVertex; break; case GLSL_SHADER_FRAGMENT: lang = EShLangFragment; break; case GLSL_SHADER_COMPUTE: lang = EShLangCompute; break; default: abort(); } TShader *shader = new TShader(lang); struct pl_spirv_version spirv_ver = pl_glsl_spv_version(glsl); shader->setEnvClient(spirv_ver.vulkan ? EShClientVulkan : EShClientOpenGL, (EShTargetClientVersion) spirv_ver.env_version); shader->setEnvTarget(EShTargetSpv, (EShTargetLanguageVersion) spirv_ver.spv_version); shader->setStrings(&text, 1); TBuiltInResource limits = DefaultTBuiltInResource; limits.maxComputeWorkGroupSizeX = glsl->max_group_size[0]; limits.maxComputeWorkGroupSizeY = glsl->max_group_size[1]; limits.maxComputeWorkGroupSizeZ = glsl->max_group_size[2]; limits.minProgramTexelOffset = glsl->min_gather_offset; limits.maxProgramTexelOffset = glsl->max_gather_offset; if (!shader->parse(&limits, 0, true, EShMsgDefault)) { res->error_msg = pl_str0dup0(res, shader->getInfoLog()); delete shader; return res; } TProgram *prog = new TProgram(); prog->addShader(shader); if (!prog->link(EShMsgDefault)) { res->error_msg = pl_str0dup0(res, prog->getInfoLog()); delete shader; delete prog; return res; } std::vector spirv; GlslangToSpv(*prog->getIntermediate(lang), spirv); res->success = true; res->size = spirv.size() * sizeof(unsigned int); res->data = pl_memdup(res, spirv.data(), res->size), delete shader; delete prog; return res; } // Taken from glslang's examples, which apparently generally bases the choices // on OpenGL specification limits const TBuiltInResource DefaultTBuiltInResource = { /* .MaxLights = */ 32, /* .MaxClipPlanes = */ 6, /* .MaxTextureUnits = */ 32, /* .MaxTextureCoords = */ 32, /* .MaxVertexAttribs = */ 64, /* .MaxVertexUniformComponents = */ 4096, /* .MaxVaryingFloats = */ 64, /* .MaxVertexTextureImageUnits = */ 32, /* .MaxCombinedTextureImageUnits = */ 80, /* .MaxTextureImageUnits = */ 32, /* .MaxFragmentUniformComponents = */ 4096, /* .MaxDrawBuffers = */ 32, /* .MaxVertexUniformVectors = */ 128, /* .MaxVaryingVectors = */ 8, /* .MaxFragmentUniformVectors = */ 16, /* .MaxVertexOutputVectors = */ 16, /* .MaxFragmentInputVectors = */ 15, /* .MinProgramTexelOffset = */ -8, /* .MaxProgramTexelOffset = */ 7, /* .MaxClipDistances = */ 8, /* .MaxComputeWorkGroupCountX = */ 65535, /* .MaxComputeWorkGroupCountY = */ 65535, /* .MaxComputeWorkGroupCountZ = */ 65535, /* .MaxComputeWorkGroupSizeX = */ 1024, /* .MaxComputeWorkGroupSizeY = */ 1024, /* .MaxComputeWorkGroupSizeZ = */ 64, /* .MaxComputeUniformComponents = */ 1024, /* .MaxComputeTextureImageUnits = */ 16, /* .MaxComputeImageUniforms = */ 8, /* .MaxComputeAtomicCounters = */ 8, /* .MaxComputeAtomicCounterBuffers = */ 1, /* .MaxVaryingComponents = */ 60, /* .MaxVertexOutputComponents = */ 64, /* .MaxGeometryInputComponents = */ 64, /* .MaxGeometryOutputComponents = */ 128, /* .MaxFragmentInputComponents = */ 128, /* .MaxImageUnits = */ 8, /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8, /* .MaxCombinedShaderOutputResources = */ 8, /* .MaxImageSamples = */ 0, /* .MaxVertexImageUniforms = */ 0, /* .MaxTessControlImageUniforms = */ 0, /* .MaxTessEvaluationImageUniforms = */ 0, /* .MaxGeometryImageUniforms = */ 0, /* .MaxFragmentImageUniforms = */ 8, /* .MaxCombinedImageUniforms = */ 8, /* .MaxGeometryTextureImageUnits = */ 16, /* .MaxGeometryOutputVertices = */ 256, /* .MaxGeometryTotalOutputComponents = */ 1024, /* .MaxGeometryUniformComponents = */ 1024, /* .MaxGeometryVaryingComponents = */ 64, /* .MaxTessControlInputComponents = */ 128, /* .MaxTessControlOutputComponents = */ 128, /* .MaxTessControlTextureImageUnits = */ 16, /* .MaxTessControlUniformComponents = */ 1024, /* .MaxTessControlTotalOutputComponents = */ 4096, /* .MaxTessEvaluationInputComponents = */ 128, /* .MaxTessEvaluationOutputComponents = */ 128, /* .MaxTessEvaluationTextureImageUnits = */ 16, /* .MaxTessEvaluationUniformComponents = */ 1024, /* .MaxTessPatchComponents = */ 120, /* .MaxPatchVertices = */ 32, /* .MaxTessGenLevel = */ 64, /* .MaxViewports = */ 16, /* .MaxVertexAtomicCounters = */ 0, /* .MaxTessControlAtomicCounters = */ 0, /* .MaxTessEvaluationAtomicCounters = */ 0, /* .MaxGeometryAtomicCounters = */ 0, /* .MaxFragmentAtomicCounters = */ 8, /* .MaxCombinedAtomicCounters = */ 8, /* .MaxAtomicCounterBindings = */ 1, /* .MaxVertexAtomicCounterBuffers = */ 0, /* .MaxTessControlAtomicCounterBuffers = */ 0, /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, /* .MaxGeometryAtomicCounterBuffers = */ 0, /* .MaxFragmentAtomicCounterBuffers = */ 1, /* .MaxCombinedAtomicCounterBuffers = */ 1, /* .MaxAtomicCounterBufferSize = */ 16384, /* .MaxTransformFeedbackBuffers = */ 4, /* .MaxTransformFeedbackInterleavedComponents = */ 64, /* .MaxCullDistances = */ 8, /* .MaxCombinedClipAndCullDistances = */ 8, /* .MaxSamples = */ 4, #if GLSLANG_VERSION_CHECK(0, 0, 2892) /* .maxMeshOutputVerticesNV = */ 256, /* .maxMeshOutputPrimitivesNV = */ 512, /* .maxMeshWorkGroupSizeX_NV = */ 32, /* .maxMeshWorkGroupSizeY_NV = */ 1, /* .maxMeshWorkGroupSizeZ_NV = */ 1, /* .maxTaskWorkGroupSizeX_NV = */ 32, /* .maxTaskWorkGroupSizeY_NV = */ 1, /* .maxTaskWorkGroupSizeZ_NV = */ 1, /* .maxMeshViewCountNV = */ 4, #endif #if GLSLANG_VERSION_CHECK(0, 0, 3763) /* .maxDualSourceDrawBuffersEXT = */ 1, #endif /* .limits = */ { /* .nonInductiveForLoops = */ 1, /* .whileLoops = */ 1, /* .doWhileLoops = */ 1, /* .generalUniformIndexing = */ 1, /* .generalAttributeMatrixVectorIndexing = */ 1, /* .generalVaryingIndexing = */ 1, /* .generalSamplerIndexing = */ 1, /* .generalVariableIndexing = */ 1, /* .generalConstantMatrixVectorIndexing = */ 1, } }; libplacebo-v4.192.1/src/glsl/glslang.h000066400000000000000000000026221417677245700175300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #ifdef __cplusplus extern "C" { #endif #include "utils.h" bool pl_glslang_init(void); void pl_glslang_uninit(void); struct pl_glslang_res { // Compilation status bool success; const char *error_msg; // Compiled shader memory, or NULL void *data; size_t size; }; // Compile GLSL into a SPIRV stream, if possible. The resulting // pl_glslang_res can simply be freed with pl_free() when done. struct pl_glslang_res *pl_glslang_compile(const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *shader); #ifdef __cplusplus } #endif libplacebo-v4.192.1/src/glsl/spirv.c000066400000000000000000000036511417677245700172420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "spirv.h" extern const struct spirv_compiler_impl pl_spirv_shaderc; extern const struct spirv_compiler_impl pl_spirv_glslang; static const struct spirv_compiler_impl *compilers[] = { #ifdef PL_HAVE_SHADERC &pl_spirv_shaderc, #endif #ifdef PL_HAVE_GLSLANG &pl_spirv_glslang, #endif }; struct spirv_compiler *spirv_compiler_create(pl_log log) { for (int i = 0; i < PL_ARRAY_SIZE(compilers); i++) { struct spirv_compiler *spirv = compilers[i]->create(log); if (!spirv) continue; pl_info(log, "Initialized SPIR-V compiler '%s'", compilers[i]->name); return spirv; } pl_fatal(log, "Failed initializing any SPIR-V compiler! Maybe libplacebo " "was built without support for either libshaderc or glslang?"); return NULL; } void spirv_compiler_destroy(struct spirv_compiler **spirv) { if (!*spirv) return; (*spirv)->impl->destroy(*spirv); } pl_str spirv_compile_glsl(struct spirv_compiler *spirv, void *alloc, const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *shader) { return spirv->impl->compile(spirv, alloc, glsl, stage, shader); } libplacebo-v4.192.1/src/glsl/spirv.h000066400000000000000000000032301417677245700172400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "log.h" #include "utils.h" struct spirv_compiler { const struct spirv_compiler_impl *impl; pl_log log; // For cache invalidation, should uniquely identify everything about this // spirv compiler and its configuration. uint64_t signature; }; // Initialize a SPIR-V compiler instance, or returns NULL on failure. struct spirv_compiler *spirv_compiler_create(pl_log log); void spirv_compiler_destroy(struct spirv_compiler **spirv); // Compile GLSL to SPIR-V. Returns {0} on failure. pl_str spirv_compile_glsl(struct spirv_compiler *spirv, void *alloc, const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *shader); struct spirv_compiler_impl { const char *name; void (*destroy)(struct spirv_compiler *spirv); __typeof__(spirv_compiler_create) *create; __typeof__(spirv_compile_glsl) *compile; }; libplacebo-v4.192.1/src/glsl/spirv_glslang.c000066400000000000000000000047721417677245700207560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "spirv.h" #include "glsl/glslang.h" const struct spirv_compiler_impl pl_spirv_glslang; static void glslang_destroy(struct spirv_compiler *spirv) { pl_glslang_uninit(); pl_free(spirv); } static struct spirv_compiler *glslang_create(pl_log log) { if (!pl_glslang_init()) { pl_fatal(log, "Failed initializing glslang SPIR-V compiler!"); return NULL; } struct spirv_compiler *spirv = pl_alloc_ptr(NULL, spirv); *spirv = (struct spirv_compiler) { .signature = pl_str0_hash(pl_spirv_glslang.name), .impl = &pl_spirv_glslang, .log = log, }; pl_info(log, "glslang version: %d.%d.%d", GLSLANG_VERSION_MAJOR, GLSLANG_VERSION_MINOR, GLSLANG_VERSION_PATCH); pl_hash_merge(&spirv->signature, (GLSLANG_VERSION_MAJOR & 0xFF) << 24 | (GLSLANG_VERSION_MINOR & 0xFF) << 16 | (GLSLANG_VERSION_PATCH & 0xFFFF)); return spirv; } static pl_str glslang_compile(struct spirv_compiler *spirv, void *alloc, const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *shader) { struct pl_glslang_res *res = pl_glslang_compile(glsl, stage, shader); if (!res || !res->success) { PL_ERR(spirv, "glslang failed: %s", res ? res->error_msg : "(null)"); pl_free(res); return (struct pl_str) {0}; } struct pl_str ret = { .buf = pl_steal(alloc, res->data), .len = res->size, }; pl_free(res); return ret; } const struct spirv_compiler_impl pl_spirv_glslang = { .name = "glslang", .destroy = glslang_destroy, .create = glslang_create, .compile = glslang_compile, }; libplacebo-v4.192.1/src/glsl/spirv_shaderc.c000066400000000000000000000121771417677245700207360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "spirv.h" #include "utils.h" const struct spirv_compiler_impl pl_spirv_shaderc; struct priv { shaderc_compiler_t compiler; }; static void shaderc_destroy(struct spirv_compiler *spirv) { struct priv *p = PL_PRIV(spirv); shaderc_compiler_release(p->compiler); pl_free(spirv); } static struct spirv_compiler *shaderc_create(pl_log log) { struct spirv_compiler *spirv = pl_alloc_obj(NULL, spirv, shaderc_compiler_t); *spirv = (struct spirv_compiler) { .signature = pl_str0_hash(pl_spirv_shaderc.name), .impl = &pl_spirv_shaderc, .log = log, }; struct priv *p = PL_PRIV(spirv); p->compiler = shaderc_compiler_initialize(); if (!p->compiler) goto error; unsigned int ver = 0, rev = 0; shaderc_get_spv_version(&ver, &rev); pl_info(log, "shaderc SPIR-V version %u.%u rev %u", ver >> 16, (ver >> 8) & 0xff, rev); pl_hash_merge(&spirv->signature, (uint64_t) ver << 32 | rev); return spirv; error: shaderc_destroy(spirv); return NULL; } static pl_str shaderc_compile(struct spirv_compiler *spirv, void *alloc, const struct pl_glsl_version *glsl, enum glsl_shader_stage stage, const char *shader) { struct priv *p = PL_PRIV(spirv); shaderc_compile_options_t opts = shaderc_compile_options_initialize(); if (!opts) return (pl_str) {0}; struct pl_spirv_version spirv_ver = pl_glsl_spv_version(glsl); shaderc_compile_options_set_optimization_level(opts, shaderc_optimization_level_performance); shaderc_compile_options_set_target_spirv(opts, spirv_ver.spv_version); shaderc_compile_options_set_target_env(opts, spirv_ver.vulkan ? shaderc_target_env_vulkan : shaderc_target_env_opengl, spirv_ver.env_version); for (int i = 0; i < 3; i++) { shaderc_compile_options_set_limit(opts, shaderc_limit_max_compute_work_group_size_x + i, glsl->max_group_size[i]); } shaderc_compile_options_set_limit(opts, shaderc_limit_min_program_texel_offset, glsl->min_gather_offset); shaderc_compile_options_set_limit(opts, shaderc_limit_max_program_texel_offset, glsl->max_gather_offset); static const shaderc_shader_kind kinds[] = { [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, }; shaderc_compilation_result_t res; res = shaderc_compile_into_spv(p->compiler, shader, strlen(shader), kinds[stage], "input", "main", opts); int errs = shaderc_result_get_num_errors(res), warn = shaderc_result_get_num_warnings(res); enum pl_log_level lev = errs ? PL_LOG_ERR : warn ? PL_LOG_INFO : PL_LOG_DEBUG; int s = shaderc_result_get_compilation_status(res); bool success = s == shaderc_compilation_status_success; if (!success) lev = PL_LOG_ERR; const char *msg = shaderc_result_get_error_message(res); if (msg[0]) PL_MSG(spirv, lev, "shaderc output:\n%s", msg); static const char *results[] = { [shaderc_compilation_status_success] = "success", [shaderc_compilation_status_invalid_stage] = "invalid stage", [shaderc_compilation_status_compilation_error] = "error", [shaderc_compilation_status_internal_error] = "internal error", [shaderc_compilation_status_null_result_object] = "no result", [shaderc_compilation_status_invalid_assembly] = "invalid assembly", }; const char *status = s < PL_ARRAY_SIZE(results) ? results[s] : "unknown"; PL_MSG(spirv, lev, "shaderc compile status '%s' (%d errors, %d warnings)", status, errs, warn); pl_str ret = {0}; if (success) { void *bytes = (void *) shaderc_result_get_bytes(res); pl_assert(bytes); ret.len = shaderc_result_get_length(res); ret.buf = pl_memdup(alloc, bytes, ret.len); } shaderc_result_release(res); shaderc_compile_options_release(opts); return ret; } const struct spirv_compiler_impl pl_spirv_shaderc = { .name = "shaderc", .destroy = shaderc_destroy, .create = shaderc_create, .compile = shaderc_compile, }; libplacebo-v4.192.1/src/glsl/utils.c000066400000000000000000000024311417677245700172320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "utils.h" static const struct pl_spirv_version spv_ver_vulkan_1_0 = { .vulkan = true, .env_version = 1 << 22, .spv_version = 1 << 16, }; static const struct pl_spirv_version spv_ver_vulkan_1_1 = { .vulkan = true, .env_version = 1 << 22 | 1 << 12, .spv_version = 1 << 16 | 3 << 8, }; struct pl_spirv_version pl_glsl_spv_version(const struct pl_glsl_version *glsl) { // We don't currently use SPIR-V for OpenGL pl_assert(glsl->vulkan); if (glsl->subgroup_size) return spv_ver_vulkan_1_1; return spv_ver_vulkan_1_0; } libplacebo-v4.192.1/src/glsl/utils.h000066400000000000000000000020721417677245700172400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include struct pl_spirv_version { bool vulkan; uint32_t env_version; uint32_t spv_version; }; struct pl_spirv_version pl_glsl_spv_version(const struct pl_glsl_version *glsl); enum glsl_shader_stage { GLSL_SHADER_VERTEX = 0, GLSL_SHADER_FRAGMENT, GLSL_SHADER_COMPUTE, }; libplacebo-v4.192.1/src/gpu.c000066400000000000000000002263111417677245700157310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "log.h" #include "shaders.h" #include "gpu.h" #define require(expr) \ do { \ if (!(expr)) { \ PL_ERR(gpu, "Validation failed: %s (%s:%d)", \ #expr, __FILE__, __LINE__); \ pl_log_stack_trace(gpu->log, PL_LOG_ERR); \ goto error; \ } \ } while (0) void pl_gpu_destroy(pl_gpu gpu) { if (!gpu) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->destroy(gpu); } bool pl_fmt_is_ordered(pl_fmt fmt) { bool ret = !fmt->opaque; for (int i = 0; i < fmt->num_components; i++) ret &= fmt->sample_order[i] == i; return ret; } bool pl_fmt_is_float(pl_fmt fmt) { switch (fmt->type) { case PL_FMT_UNKNOWN: // more likely than not case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: return true; case PL_FMT_UINT: case PL_FMT_SINT: return false; case PL_FMT_TYPE_COUNT: break; } pl_unreachable(); } bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier) { if (!fmt) return false; for (int i = 0; i < fmt->num_modifiers; i++) { if (fmt->modifiers[i] == modifier) return true; } return false; } static int cmp_fmt(const void *pa, const void *pb) { pl_fmt a = *(pl_fmt *)pa; pl_fmt b = *(pl_fmt *)pb; // Always prefer non-opaque formats if (a->opaque != b->opaque) return PL_CMP(a->opaque, b->opaque); // Always prefer non-emulated formats if (a->emulated != b->emulated) return PL_CMP(a->emulated, b->emulated); int ca = __builtin_popcount(a->caps), cb = __builtin_popcount(b->caps); if (ca != cb) return -PL_CMP(ca, cb); // invert to sort higher values first // If the population count is the same but the caps are different, prefer // the caps with a "lower" value (which tend to be more fundamental caps) if (a->caps != b->caps) return PL_CMP(a->caps, b->caps); // If the capabilities are equal, sort based on the component attributes for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) { int da = a->component_depth[i], db = b->component_depth[i]; if (da != db) return PL_CMP(da, db); int ha = a->host_bits[i], hb = b->host_bits[i]; if (ha != hb) return PL_CMP(ha, hb); int oa = a->sample_order[i], ob = b->sample_order[i]; if (oa != ob) return PL_CMP(oa, ob); } // Fall back to sorting by the name (for stability) return strcmp(a->name, b->name); } #define FMT_BOOL(letter, cap) ((cap) ? (letter) : '-') #define FMT_IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3] static void print_formats(pl_gpu gpu) { if (!pl_msg_test(gpu->log, PL_LOG_DEBUG)) return; #define CAP_HEADER "%-12s" #define CAP_FIELDS "%c%c%c%c%c%c%c%c%c%c%c%c" #define CAP_VALUES \ FMT_BOOL('S', fmt->caps & PL_FMT_CAP_SAMPLEABLE), \ FMT_BOOL('s', fmt->caps & PL_FMT_CAP_STORABLE), \ FMT_BOOL('L', fmt->caps & PL_FMT_CAP_LINEAR), \ FMT_BOOL('R', fmt->caps & PL_FMT_CAP_RENDERABLE), \ FMT_BOOL('b', fmt->caps & PL_FMT_CAP_BLENDABLE), \ FMT_BOOL('B', fmt->caps & PL_FMT_CAP_BLITTABLE), \ FMT_BOOL('V', fmt->caps & PL_FMT_CAP_VERTEX), \ FMT_BOOL('u', fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM), \ FMT_BOOL('t', fmt->caps & PL_FMT_CAP_TEXEL_STORAGE), \ FMT_BOOL('H', fmt->caps & PL_FMT_CAP_HOST_READABLE), \ FMT_BOOL('W', fmt->caps & PL_FMT_CAP_READWRITE), \ FMT_BOOL('G', fmt->gatherable) PL_DEBUG(gpu, "GPU texture formats:"); PL_DEBUG(gpu, " %-10s %-6s %-4s %-4s " CAP_HEADER " %-3s %-13s %-13s %-10s %-10s %-6s", "NAME", "TYPE", "SIZE", "COMP", "CAPS", "EMU", "DEPTH", "HOST_BITS", "GLSL_TYPE", "GLSL_FMT", "FOURCC"); for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; static const char *types[] = { [PL_FMT_UNKNOWN] = "UNKNOWN", [PL_FMT_UNORM] = "UNORM", [PL_FMT_SNORM] = "SNORM", [PL_FMT_UINT] = "UINT", [PL_FMT_SINT] = "SINT", [PL_FMT_FLOAT] = "FLOAT", }; static const char idx_map[4] = {'R', 'G', 'B', 'A'}; char indices[4] = {' ', ' ', ' ', ' '}; if (!fmt->opaque) { for (int i = 0; i < fmt->num_components; i++) indices[i] = idx_map[fmt->sample_order[i]]; } PL_DEBUG(gpu, " %-10s %-6s %-4zu %c%c%c%c " CAP_FIELDS " %-3s " "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s %-6s", fmt->name, types[fmt->type], fmt->texel_size, FMT_IDX4(indices), CAP_VALUES, fmt->emulated ? "y" : "n", FMT_IDX4(fmt->component_depth), FMT_IDX4(fmt->host_bits), PL_DEF(fmt->glsl_type, ""), PL_DEF(fmt->glsl_format, ""), PRINT_FOURCC(fmt->fourcc)); #undef CAP_HEADER #undef CAP_FIELDS #undef CAP_VALUES for (int i = 0; i < fmt->num_modifiers; i++) { PL_TRACE(gpu, " modifiers[%d]: %s", i, PRINT_DRM_MOD(fmt->modifiers[i])); } } } pl_gpu pl_gpu_finalize(struct pl_gpu *gpu) { // Sort formats qsort(gpu->formats, gpu->num_formats, sizeof(pl_fmt), cmp_fmt); // Verification pl_assert(gpu->ctx == gpu->log); pl_assert(gpu->limits.max_tex_2d_dim); pl_assert(gpu->limits.max_variable_comps || gpu->limits.max_ubo_size); for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; pl_assert(fmt->name); pl_assert(fmt->type); pl_assert(fmt->num_components); pl_assert(fmt->internal_size); pl_assert(fmt->opaque ? !fmt->texel_size : fmt->texel_size); pl_assert(!fmt->gatherable || (fmt->caps & PL_FMT_CAP_SAMPLEABLE)); for (int i = 0; i < fmt->num_components; i++) { pl_assert(fmt->component_depth[i]); pl_assert(fmt->opaque ? !fmt->host_bits[i] : fmt->host_bits[i]); } enum pl_fmt_caps texel_caps = PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM | PL_FMT_CAP_TEXEL_STORAGE; if (fmt->caps & texel_caps) { pl_assert(fmt->glsl_type); pl_assert(!fmt->opaque); } pl_assert(!fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE)); pl_assert(!fmt->texel_size == !fmt->texel_align); pl_assert(fmt->texel_size % fmt->texel_align == 0); if (fmt->internal_size != fmt->texel_size && !fmt->opaque) pl_assert(fmt->emulated); // Assert uniqueness of name for (int o = n + 1; o < gpu->num_formats; o++) pl_assert(strcmp(fmt->name, gpu->formats[o]->name) != 0); } // Print info PL_INFO(gpu, "GPU information:"); #define LOG(fmt, field) \ PL_INFO(gpu, " %-26s %" fmt, #field ":", gpu->LOG_STRUCT.field) #define LOG_STRUCT glsl PL_INFO(gpu, " GLSL version: %d%s", gpu->glsl.version, gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : ""); if (gpu->glsl.compute) { LOG("zu", max_shmem_size); LOG(PRIu32, max_group_threads); LOG(PRIu32, max_group_size[0]); LOG(PRIu32, max_group_size[1]); LOG(PRIu32, max_group_size[2]); } LOG(PRIu32, subgroup_size); LOG(PRIi16, min_gather_offset); LOG(PRIi16, max_gather_offset); #undef LOG_STRUCT #define LOG_STRUCT limits PL_INFO(gpu, " Limits:"); // pl_gpu LOG("d", thread_safe); LOG("d", callbacks); // pl_buf LOG("zu", max_buf_size); LOG("zu", max_ubo_size); LOG("zu", max_ssbo_size); LOG("zu", max_vbo_size); LOG("zu", max_mapped_size); LOG(PRIu64, max_buffer_texels); LOG("zu", align_host_ptr); // pl_tex LOG(PRIu32, max_tex_1d_dim); LOG(PRIu32, max_tex_2d_dim); LOG(PRIu32, max_tex_3d_dim); LOG("d", blittable_1d_3d); LOG("d", buf_transfer); LOG("zu", align_tex_xfer_pitch); LOG("zu", align_tex_xfer_offset); // pl_pass LOG("zu", max_variable_comps); LOG("zu", max_constants); LOG("zu", max_pushc_size); LOG("zu", align_vertex_stride); if (gpu->glsl.compute) { LOG(PRIu32, max_dispatch[0]); LOG(PRIu32, max_dispatch[1]); LOG(PRIu32, max_dispatch[2]); } LOG(PRIu32, fragment_queues); LOG(PRIu32, compute_queues); #undef LOG_STRUCT #undef LOG if (pl_gpu_supports_interop(gpu)) { PL_INFO(gpu, " External API interop:"); PL_INFO(gpu, " UUID: %s", PRINT_UUID(gpu->uuid)); PL_INFO(gpu, " PCI: %04x:%02x:%02x:%x", gpu->pci.domain, gpu->pci.bus, gpu->pci.device, gpu->pci.function); PL_INFO(gpu, " buf export caps: 0x%x", (unsigned int) gpu->export_caps.buf); PL_INFO(gpu, " buf import caps: 0x%x", (unsigned int) gpu->import_caps.buf); PL_INFO(gpu, " tex export caps: 0x%x", (unsigned int) gpu->export_caps.tex); PL_INFO(gpu, " tex import caps: 0x%x", (unsigned int) gpu->import_caps.tex); PL_INFO(gpu, " sync export caps: 0x%x", (unsigned int) gpu->export_caps.sync); PL_INFO(gpu, " sync import caps: 0x%x", (unsigned int) gpu->import_caps.sync); } print_formats(gpu); // Set `gpu->caps` for backwards compatibility pl_gpu_caps caps = 0; if (gpu->glsl.compute) caps |= PL_GPU_CAP_COMPUTE; if (gpu->limits.compute_queues > gpu->limits.fragment_queues) caps |= PL_GPU_CAP_PARALLEL_COMPUTE; if (gpu->limits.max_variable_comps) caps |= PL_GPU_CAP_INPUT_VARIABLES; if (gpu->limits.max_mapped_size) caps |= PL_GPU_CAP_MAPPED_BUFFERS; if (gpu->limits.blittable_1d_3d) caps |= PL_GPU_CAP_BLITTABLE_1D_3D; if (gpu->glsl.subgroup_size) caps |= PL_GPU_CAP_SUBGROUPS; if (gpu->limits.callbacks) caps |= PL_GPU_CAP_CALLBACKS; if (gpu->limits.thread_safe) caps |= PL_GPU_CAP_THREAD_SAFE; if (gpu->limits.max_constants) caps |= PL_GPU_CAP_SPEC_CONSTANTS; gpu->caps = caps; // Set the backwards compatibility fields in `limits` gpu->limits.max_shmem_size = gpu->glsl.max_shmem_size; gpu->limits.max_group_threads = gpu->glsl.max_group_threads; for (int i = 0; i < 3; i++) gpu->limits.max_group_size[i] = gpu->glsl.max_group_size[i]; gpu->limits.subgroup_size = gpu->glsl.subgroup_size; gpu->limits.min_gather_offset = gpu->glsl.min_gather_offset; gpu->limits.max_gather_offset = gpu->glsl.max_gather_offset; gpu->limits.max_variables = gpu->limits.max_variable_comps; return gpu; } struct glsl_fmt { enum pl_fmt_type type; int num_components; int depth[4]; const char *glsl_format; uint32_t drm_fourcc; }; // List taken from the GLSL specification. (Yes, GLSL supports only exactly // these formats with exactly these names) static const struct glsl_fmt pl_glsl_fmts[] = { {PL_FMT_FLOAT, 1, {16}, "r16f"}, {PL_FMT_FLOAT, 1, {32}, "r32f"}, {PL_FMT_FLOAT, 2, {16, 16}, "rg16f"}, {PL_FMT_FLOAT, 2, {32, 32}, "rg32f"}, {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, {PL_FMT_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, {PL_FMT_UNORM, 1, {8}, "r8"}, {PL_FMT_UNORM, 1, {16}, "r16"}, {PL_FMT_UNORM, 2, {8, 8}, "rg8"}, {PL_FMT_UNORM, 2, {16, 16}, "rg16"}, {PL_FMT_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, {PL_FMT_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, {PL_FMT_SNORM, 1, {8}, "r8_snorm"}, {PL_FMT_SNORM, 1, {16}, "r16_snorm"}, {PL_FMT_SNORM, 2, {8, 8}, "rg8_snorm"}, {PL_FMT_SNORM, 2, {16, 16}, "rg16_snorm"}, {PL_FMT_SNORM, 4, {8, 8, 8, 8}, "rgba8_snorm"}, {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"}, {PL_FMT_UINT, 1, {8}, "r8ui"}, {PL_FMT_UINT, 1, {16}, "r16ui"}, {PL_FMT_UINT, 1, {32}, "r32ui"}, {PL_FMT_UINT, 2, {8, 8}, "rg8ui"}, {PL_FMT_UINT, 2, {16, 16}, "rg16ui"}, {PL_FMT_UINT, 2, {32, 32}, "rg32ui"}, {PL_FMT_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, {PL_FMT_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, {PL_FMT_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, {PL_FMT_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, {PL_FMT_SINT, 1, {8}, "r8i"}, {PL_FMT_SINT, 1, {16}, "r16i"}, {PL_FMT_SINT, 1, {32}, "r32i"}, {PL_FMT_SINT, 2, {8, 8}, "rg8i"}, {PL_FMT_SINT, 2, {16, 16}, "rg16i"}, {PL_FMT_SINT, 2, {32, 32}, "rg32i"}, {PL_FMT_SINT, 4, {8, 8, 8, 8}, "rgba8i"}, {PL_FMT_SINT, 4, {16, 16, 16, 16}, "rgba16i"}, {PL_FMT_SINT, 4, {32, 32, 32, 32}, "rgba32i"}, }; const char *pl_fmt_glsl_format(pl_fmt fmt, int components) { if (fmt->opaque) return NULL; for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) { const struct glsl_fmt *gfmt = &pl_glsl_fmts[n]; if (fmt->type != gfmt->type) continue; if (components != gfmt->num_components) continue; // The component order is irrelevant, so we need to sort the depth // based on the component's index int depth[4] = {0}; for (int i = 0; i < fmt->num_components; i++) depth[fmt->sample_order[i]] = fmt->component_depth[i]; // Copy over any emulated components for (int i = fmt->num_components; i < components; i++) depth[i] = gfmt->depth[i]; for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) { if (depth[i] != gfmt->depth[i]) goto next_fmt; } return gfmt->glsl_format; next_fmt: ; // equivalent to `continue` } return NULL; } #define FOURCC(a,b,c,d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) struct pl_fmt_fourcc { const char *name; uint32_t fourcc; }; static const struct pl_fmt_fourcc pl_fmt_fourccs[] = { // 8 bpp red {"r8", FOURCC('R','8',' ',' ')}, // 16 bpp red {"r16", FOURCC('R','1','6',' ')}, // 16 bpp rg {"rg8", FOURCC('G','R','8','8')}, {"gr8", FOURCC('R','G','8','8')}, // 32 bpp rg {"rg16", FOURCC('G','R','3','2')}, {"gr16", FOURCC('R','G','3','2')}, // 8 bpp rgb: N/A // 16 bpp rgb {"argb4", FOURCC('B','A','1','2')}, {"abgr4", FOURCC('R','A','1','2')}, {"rgba4", FOURCC('A','B','1','2')}, {"bgra4", FOURCC('A','R','1','2')}, {"a1rgb5", FOURCC('B','A','1','5')}, {"a1bgr5", FOURCC('R','A','1','5')}, {"rgb5a1", FOURCC('A','B','1','5')}, {"bgr5a1", FOURCC('A','R','1','5')}, {"rgb565", FOURCC('B','G','1','6')}, {"bgr565", FOURCC('R','G','1','6')}, // 24 bpp rgb {"rgb8", FOURCC('B','G','2','4')}, {"bgr8", FOURCC('R','G','2','4')}, // 32 bpp rgb {"argb8", FOURCC('B','A','2','4')}, {"abgr8", FOURCC('R','A','2','4')}, {"rgba8", FOURCC('A','B','2','4')}, {"bgra8", FOURCC('A','R','2','4')}, {"a2rgb10", FOURCC('B','A','3','0')}, {"a2bgr10", FOURCC('R','A','3','0')}, {"rgb10a2", FOURCC('A','B','3','0')}, {"bgr10a2", FOURCC('A','R','3','0')}, // 64bpp rgb {"rgba16hf", FOURCC('A','B','4','H')}, {"bgra16hf", FOURCC('A','R','4','H')}, // no planar formats yet (tm) }; uint32_t pl_fmt_fourcc(pl_fmt fmt) { if (fmt->opaque) return 0; for (int n = 0; n < PL_ARRAY_SIZE(pl_fmt_fourccs); n++) { const struct pl_fmt_fourcc *fourcc = &pl_fmt_fourccs[n]; if (strcmp(fmt->name, fourcc->name) == 0) return fourcc->fourcc; } return 0; // no matching format } pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps) { for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (fmt->type != type || fmt->num_components != num_components) continue; if ((fmt->caps & caps) != caps) continue; // When specifying some particular host representation, ensure the // format is non-opaque, ordered and unpadded if (host_bits && fmt->opaque) continue; if (host_bits && fmt->texel_size * 8 != host_bits * num_components) continue; if (host_bits && !pl_fmt_is_ordered(fmt)) continue; for (int i = 0; i < fmt->num_components; i++) { if (fmt->component_depth[i] < min_depth) goto next_fmt; if (host_bits && fmt->host_bits[i] != host_bits) goto next_fmt; } return fmt; next_fmt: ; // equivalent to `continue` } // ran out of formats PL_DEBUG(gpu, "No matching format found"); return NULL; } pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int comps) { static const size_t sizes[] = { [PL_FMT_FLOAT] = sizeof(float), [PL_FMT_UNORM] = sizeof(unsigned), [PL_FMT_UINT] = sizeof(unsigned), [PL_FMT_SNORM] = sizeof(int), [PL_FMT_SINT] = sizeof(int), }; return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX); } pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name) { if (!name) return NULL; for (int i = 0; i < gpu->num_formats; i++) { pl_fmt fmt = gpu->formats[i]; if (strcmp(name, fmt->name) == 0) return fmt; } // ran out of formats return NULL; } pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc) { if (!fourcc) return NULL; for (int i = 0; i < gpu->num_formats; i++) { pl_fmt fmt = gpu->formats[i]; if (fourcc == fmt->fourcc) return fmt; } // ran out of formats return NULL; } static inline bool check_mod(pl_gpu gpu, pl_fmt fmt, uint64_t mod) { for (int i = 0; i < fmt->num_modifiers; i++) { if (fmt->modifiers[i] == mod) return true; } PL_ERR(gpu, "DRM modifier %s not available for format %s. Available modifiers:", PRINT_DRM_MOD(mod), fmt->name); for (int i = 0; i < fmt->num_modifiers; i++) PL_ERR(gpu, " %s", PRINT_DRM_MOD(fmt->modifiers[i])); return false; } pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { require(!params->import_handle || !params->export_handle); require(!params->import_handle || !params->initial_data); if (params->export_handle) { require(params->export_handle & gpu->export_caps.tex); require(PL_ISPOT(params->export_handle)); } if (params->import_handle) { require(params->import_handle & gpu->import_caps.tex); require(PL_ISPOT(params->import_handle)); if (params->import_handle == PL_HANDLE_DMA_BUF) { if (!check_mod(gpu, params->format, params->shared_mem.drm_format_mod)) goto error; if (params->shared_mem.stride_w) require(params->w && params->shared_mem.stride_w >= params->w); if (params->shared_mem.stride_h) require(params->h && params->shared_mem.stride_h >= params->h); } } switch (pl_tex_params_dimension(*params)) { case 1: require(params->w > 0); require(params->w <= gpu->limits.max_tex_1d_dim); require(!params->renderable); require(!params->blit_src || gpu->limits.blittable_1d_3d); require(!params->blit_dst || gpu->limits.blittable_1d_3d); break; case 2: require(params->w > 0 && params->h > 0); require(params->w <= gpu->limits.max_tex_2d_dim); require(params->h <= gpu->limits.max_tex_2d_dim); break; case 3: require(params->w > 0 && params->h > 0 && params->d > 0); require(params->w <= gpu->limits.max_tex_3d_dim); require(params->h <= gpu->limits.max_tex_3d_dim); require(params->d <= gpu->limits.max_tex_3d_dim); require(!params->renderable); require(!params->blit_src || gpu->limits.blittable_1d_3d); require(!params->blit_dst || gpu->limits.blittable_1d_3d); break; } pl_fmt fmt = params->format; require(fmt); require(!params->host_readable || fmt->caps & PL_FMT_CAP_HOST_READABLE); require(!params->host_readable || !fmt->opaque); require(!params->host_writable || !fmt->opaque); require(!params->sampleable || fmt->caps & PL_FMT_CAP_SAMPLEABLE); require(!params->renderable || fmt->caps & PL_FMT_CAP_RENDERABLE); require(!params->storable || fmt->caps & PL_FMT_CAP_STORABLE); require(!params->blit_src || fmt->caps & PL_FMT_CAP_BLITTABLE); require(!params->blit_dst || fmt->caps & PL_FMT_CAP_BLITTABLE); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_create(gpu, params); error: if (params->debug_tag) PL_ERR(gpu, " for texture: %s", params->debug_tag); return NULL; } void pl_tex_destroy(pl_gpu gpu, pl_tex *tex) { if (!*tex) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->tex_destroy(gpu, *tex); *tex = NULL; } static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b) { return a.w == b.w && a.h == b.h && a.d == b.d && a.format == b.format && (a.sampleable || !b.sampleable) && (a.renderable || !b.renderable) && (a.storable || !b.storable) && (a.blit_src || !b.blit_src) && (a.blit_dst || !b.blit_dst) && (a.host_writable || !b.host_writable) && (a.host_readable || !b.host_readable); } bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params) { if (params->initial_data) { PL_ERR(gpu, "pl_tex_recreate may not be used with `initial_data`!"); return false; } if (params->import_handle) { PL_ERR(gpu, "pl_tex_recreate may not be used with `import_handle`!"); return false; } if (*tex && pl_tex_params_superset((*tex)->params, *params)) { pl_tex_invalidate(gpu, *tex); return true; } PL_DEBUG(gpu, "(Re)creating %dx%dx%d texture with format %s", params->w, params->h, params->d, params->format->name); pl_tex_destroy(gpu, tex); *tex = pl_tex_create(gpu, params); return !!*tex; } void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color) { require(dst->params.blit_dst); const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->tex_invalidate) impl->tex_invalidate(gpu, dst); impl->tex_clear_ex(gpu, dst, color); return; error: if (dst->params.debug_tag) PL_ERR(gpu, " for texture: %s", dst->params.debug_tag); } void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]) { if (!pl_fmt_is_float(dst->params.format)) { PL_ERR(gpu, "Cannot call `pl_tex_clear` on integer textures, please " "use `pl_tex_clear_ex` instead."); return; } const union pl_clear_color col = { .f = { color[0], color[1], color[2], color[3] }, }; pl_tex_clear_ex(gpu, dst, col); } void pl_tex_invalidate(pl_gpu gpu, pl_tex tex) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->tex_invalidate) impl->tex_invalidate(gpu, tex); } static void strip_coords(pl_tex tex, struct pl_rect3d *rc) { if (!tex->params.d) { rc->z0 = 0; rc->z1 = 1; } if (!tex->params.h) { rc->y0 = 0; rc->y1 = 1; } } static void infer_rc(pl_tex tex, struct pl_rect3d *rc) { if (!rc->x0 && !rc->x1) rc->x1 = tex->params.w; if (!rc->y0 && !rc->y1) rc->y1 = tex->params.h; if (!rc->z0 && !rc->z1) rc->z1 = tex->params.d; } void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { pl_tex src = params->src, dst = params->dst; require(src && dst); pl_fmt src_fmt = src->params.format; pl_fmt dst_fmt = dst->params.format; require(src_fmt->internal_size == dst_fmt->internal_size); require((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT)); require((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT)); require(src->params.blit_src); require(dst->params.blit_dst); require(params->sample_mode != PL_TEX_SAMPLE_LINEAR || (src_fmt->caps & PL_FMT_CAP_LINEAR)); struct pl_tex_blit_params fixed = *params; infer_rc(src, &fixed.src_rc); infer_rc(dst, &fixed.dst_rc); strip_coords(src, &fixed.src_rc); strip_coords(dst, &fixed.dst_rc); require(fixed.src_rc.x0 >= 0 && fixed.src_rc.x0 < src->params.w); require(fixed.src_rc.x1 > 0 && fixed.src_rc.x1 <= src->params.w); require(fixed.dst_rc.x0 >= 0 && fixed.dst_rc.x0 < dst->params.w); require(fixed.dst_rc.x1 > 0 && fixed.dst_rc.x1 <= dst->params.w); if (src->params.h) { require(fixed.src_rc.y0 >= 0 && fixed.src_rc.y0 < src->params.h); require(fixed.src_rc.y1 > 0 && fixed.src_rc.y1 <= src->params.h); } if (dst->params.h) { require(fixed.dst_rc.y0 >= 0 && fixed.dst_rc.y0 < dst->params.h); require(fixed.dst_rc.y1 > 0 && fixed.dst_rc.y1 <= dst->params.h); } if (src->params.d) { require(fixed.src_rc.z0 >= 0 && fixed.src_rc.z0 < src->params.d); require(fixed.src_rc.z1 > 0 && fixed.src_rc.z1 <= src->params.d); } if (dst->params.d) { require(fixed.dst_rc.z0 >= 0 && fixed.dst_rc.z0 < dst->params.d); require(fixed.dst_rc.z1 > 0 && fixed.dst_rc.z1 <= dst->params.d); } struct pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d}; strip_coords(dst, &full); struct pl_rect3d rcnorm = fixed.dst_rc; pl_rect3d_normalize(&rcnorm); if (pl_rect3d_eq(rcnorm, full)) pl_tex_invalidate(gpu, dst); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->tex_blit(gpu, &fixed); return; error: if (src->params.debug_tag || dst->params.debug_tag) { PL_ERR(gpu, " for textures: src %s, dst %s", PL_DEF(src->params.debug_tag, "(unknown)"), PL_DEF(dst->params.debug_tag, "(unknown)")); } } size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par) { int w = pl_rect_w(par->rc), h = pl_rect_h(par->rc), d = pl_rect_d(par->rc); size_t pixel_pitch = par->tex->params.format->texel_size; // This generates the absolute bare minimum size of a buffer required to // hold the data of a texture upload/download, by including stride padding // only where strictly necessary. return (d - 1) * par->depth_pitch + (h - 1) * par->row_pitch + w * pixel_pitch; } static bool fix_tex_transfer(pl_gpu gpu, struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_rect3d rc = params->rc; // Infer the default values infer_rc(tex, &rc); strip_coords(tex, &rc); if (!params->row_pitch && params->stride_w) params->row_pitch = params->stride_w * fmt->texel_size; if (!params->row_pitch || !tex->params.w) params->row_pitch = pl_rect_w(rc) * fmt->texel_size; if (!params->depth_pitch && params->stride_h) params->depth_pitch = params->stride_h * params->row_pitch; if (!params->depth_pitch || !tex->params.d) params->depth_pitch = pl_rect_h(rc) * params->row_pitch; params->rc = rc; // Check the parameters for sanity switch (pl_tex_params_dimension(tex->params)) { case 3: require(rc.z1 > rc.z0); require(rc.z0 >= 0 && rc.z0 < tex->params.d); require(rc.z1 > 0 && rc.z1 <= tex->params.d); require(params->depth_pitch >= pl_rect_h(rc) * params->row_pitch); require(params->depth_pitch % params->row_pitch == 0); // fall through case 2: require(rc.y1 > rc.y0); require(rc.y0 >= 0 && rc.y0 < tex->params.h); require(rc.y1 > 0 && rc.y1 <= tex->params.h); require(params->row_pitch >= pl_rect_w(rc) * fmt->texel_size); require(params->row_pitch % fmt->texel_align == 0); // fall through case 1: require(rc.x1 > rc.x0); require(rc.x0 >= 0 && rc.x0 < tex->params.w); require(rc.x1 > 0 && rc.x1 <= tex->params.w); break; } require(!params->buf ^ !params->ptr); // exactly one if (params->buf) { pl_buf buf = params->buf; size_t size = pl_tex_transfer_size(params); require(params->buf_offset + size <= buf->params.size); require(gpu->limits.buf_transfer); } require(!params->callback || gpu->limits.callbacks); return true; error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; require(tex->params.host_writable); struct pl_tex_transfer_params fixed = *params; if (!fix_tex_transfer(gpu, &fixed)) goto error; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_upload(gpu, &fixed); error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; require(tex->params.host_readable); struct pl_tex_transfer_params fixed = *params; if (!fix_tex_transfer(gpu, &fixed)) goto error; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_download(gpu, &fixed); error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t t) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_poll ? impl->tex_poll(gpu, tex, t) : false; } static bool warned_rounding = false; pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_buf_params params_rounded; require(!params->import_handle || !params->export_handle); if (params->export_handle) { require(PL_ISPOT(params->export_handle)); require(params->export_handle & gpu->export_caps.buf); } if (params->import_handle) { require(PL_ISPOT(params->import_handle)); require(params->import_handle & gpu->import_caps.buf); const struct pl_shared_mem *shmem = ¶ms->shared_mem; require(shmem->offset + params->size <= shmem->size); require(params->import_handle != PL_HANDLE_DMA_BUF || !shmem->drm_format_mod); // Fix misalignment on host pointer imports if (params->import_handle == PL_HANDLE_HOST_PTR) { uintptr_t page_mask = ~(gpu->limits.align_host_ptr - 1); uintptr_t ptr_base = (uintptr_t) shmem->handle.ptr & page_mask; size_t ptr_offset = (uintptr_t) shmem->handle.ptr - ptr_base; size_t buf_offset = ptr_offset + shmem->offset; size_t ptr_size = PL_ALIGN2(ptr_offset + shmem->size, gpu->limits.align_host_ptr); if (ptr_base != (uintptr_t) shmem->handle.ptr || ptr_size > shmem->size) { if (!warned_rounding) { warned_rounding = true; PL_WARN(gpu, "Imported host pointer is not page-aligned. " "This should normally be fine on most platforms, " "but may cause issues in some rare circumstances."); } PL_TRACE(gpu, "Rounding imported host pointer %p + %zu -> %zu to " "nearest page boundaries: %p + %zu -> %zu", shmem->handle.ptr, shmem->offset, shmem->size, (void *) ptr_base, buf_offset, ptr_size); } params_rounded = *params; params_rounded.shared_mem.handle.ptr = (void *) ptr_base; params_rounded.shared_mem.offset = buf_offset; params_rounded.shared_mem.size = ptr_size; params = ¶ms_rounded; } } require(params->size > 0 && params->size <= gpu->limits.max_buf_size); require(!params->uniform || params->size <= gpu->limits.max_ubo_size); require(!params->storable || params->size <= gpu->limits.max_ssbo_size); require(!params->drawable || params->size <= gpu->limits.max_vbo_size); require(!params->host_mapped || params->size <= gpu->limits.max_mapped_size); if (params->format) { pl_fmt fmt = params->format; require(params->size <= gpu->limits.max_buffer_texels * fmt->texel_size); require(!params->uniform || (fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM)); require(!params->storable || (fmt->caps & PL_FMT_CAP_TEXEL_STORAGE)); } const struct pl_gpu_fns *impl = PL_PRIV(gpu); pl_buf buf = impl->buf_create(gpu, params); if (buf) require(!params->host_mapped || buf->data); return buf; error: if (params->debug_tag) PL_ERR(gpu, " for buffer: %s", params->debug_tag); return NULL; } void pl_buf_destroy(pl_gpu gpu, pl_buf *buf) { if (!*buf) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_destroy(gpu, *buf); *buf = NULL; } static bool pl_buf_params_superset(struct pl_buf_params a, struct pl_buf_params b) { return a.size >= b.size && a.memory_type == b.memory_type && a.format == b.format && (a.host_writable || !b.host_writable) && (a.host_readable || !b.host_readable) && (a.host_mapped || !b.host_mapped) && (a.uniform || !b.uniform) && (a.storable || !b.storable) && (a.drawable || !b.drawable); } bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params) { if (params->initial_data) { PL_ERR(gpu, "pl_buf_recreate may not be used with `initial_data`!"); return false; } if (*buf && pl_buf_params_superset((*buf)->params, *params)) return true; PL_INFO(gpu, "(Re)creating %zu buffer", params->size); pl_buf_destroy(gpu, buf); *buf = pl_buf_create(gpu, params); return !!*buf; } void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size) { require(buf->params.host_writable); require(buf_offset + size <= buf->params.size); require(buf_offset == PL_ALIGN2(buf_offset, 4)); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_write(gpu, buf, buf_offset, data, size); return; error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); } bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size) { require(buf->params.host_readable); require(buf_offset + size <= buf->params.size); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_read(gpu, buf, buf_offset, dest, size); error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); return false; } void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { require(src_offset + size <= src->params.size); require(dst_offset + size <= dst->params.size); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_copy(gpu, dst, dst_offset, src, src_offset, size); return; error: if (src->params.debug_tag || dst->params.debug_tag) { PL_ERR(gpu, " for buffers: src %s, dst %s", src->params.debug_tag, dst->params.debug_tag); } } bool pl_buf_export(pl_gpu gpu, pl_buf buf) { require(buf->params.export_handle || buf->params.import_handle); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_export(gpu, buf); error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); return false; } bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t t) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_poll ? impl->buf_poll(gpu, buf, t) : false; } size_t pl_var_type_size(enum pl_var_type type) { switch (type) { case PL_VAR_SINT: return sizeof(int); case PL_VAR_UINT: return sizeof(unsigned int); case PL_VAR_FLOAT: return sizeof(float); case PL_VAR_INVALID: // fall through case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } #define PL_VAR(TYPE, NAME, M, V) \ struct pl_var pl_var_##NAME(const char *name) { \ return (struct pl_var) { \ .name = name, \ .type = PL_VAR_##TYPE, \ .dim_m = M, \ .dim_v = V, \ .dim_a = 1, \ }; \ } PL_VAR(FLOAT, float, 1, 1) PL_VAR(FLOAT, vec2, 1, 2) PL_VAR(FLOAT, vec3, 1, 3) PL_VAR(FLOAT, vec4, 1, 4) PL_VAR(FLOAT, mat2, 2, 2) PL_VAR(FLOAT, mat2x3, 2, 3) PL_VAR(FLOAT, mat2x4, 2, 4) PL_VAR(FLOAT, mat3, 3, 3) PL_VAR(FLOAT, mat3x4, 3, 4) PL_VAR(FLOAT, mat4x2, 4, 2) PL_VAR(FLOAT, mat4x3, 4, 3) PL_VAR(FLOAT, mat4, 4, 4) PL_VAR(SINT, int, 1, 1) PL_VAR(SINT, ivec2, 1, 2) PL_VAR(SINT, ivec3, 1, 3) PL_VAR(SINT, ivec4, 1, 4) PL_VAR(UINT, uint, 1, 1) PL_VAR(UINT, uvec2, 1, 2) PL_VAR(UINT, uvec3, 1, 3) PL_VAR(UINT, uvec4, 1, 4) #undef PL_VAR const struct pl_named_var pl_var_glsl_types[] = { // float vectors { "float", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "vec2", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "vec3", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "vec4", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, // float matrices { "mat2", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 2, .dim_a = 1, }}, { "mat2x3", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 3, .dim_a = 1, }}, { "mat2x4", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 4, .dim_a = 1, }}, { "mat3", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 3, .dim_a = 1, }}, { "mat3x4", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 4, .dim_a = 1, }}, { "mat4x2", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 2, .dim_a = 1, }}, { "mat4x3", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 3, .dim_a = 1, }}, { "mat4", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 4, .dim_a = 1, }}, // integer vectors { "int", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "ivec2", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "ivec3", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "ivec4", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, // unsigned integer vectors { "uint", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "uvec2", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "uvec3", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "uvec4", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, {0}, }; #define MAX_DIM 4 const char *pl_var_glsl_type_name(struct pl_var var) { static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = { // float vectors [PL_VAR_FLOAT][1][1] = "float", [PL_VAR_FLOAT][1][2] = "vec2", [PL_VAR_FLOAT][1][3] = "vec3", [PL_VAR_FLOAT][1][4] = "vec4", // float matrices [PL_VAR_FLOAT][2][2] = "mat2", [PL_VAR_FLOAT][2][3] = "mat2x3", [PL_VAR_FLOAT][2][4] = "mat2x4", [PL_VAR_FLOAT][3][2] = "mat3x2", [PL_VAR_FLOAT][3][3] = "mat3", [PL_VAR_FLOAT][3][4] = "mat3x4", [PL_VAR_FLOAT][4][2] = "mat4x2", [PL_VAR_FLOAT][4][3] = "mat4x3", [PL_VAR_FLOAT][4][4] = "mat4", // integer vectors [PL_VAR_SINT][1][1] = "int", [PL_VAR_SINT][1][2] = "ivec2", [PL_VAR_SINT][1][3] = "ivec3", [PL_VAR_SINT][1][4] = "ivec4", // unsigned integer vectors [PL_VAR_UINT][1][1] = "uint", [PL_VAR_UINT][1][2] = "uvec2", [PL_VAR_UINT][1][3] = "uvec3", [PL_VAR_UINT][1][4] = "uvec4", }; if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM) return NULL; return types[var.type][var.dim_m][var.dim_v]; } struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name) { static const enum pl_var_type vartypes[] = { [PL_FMT_FLOAT] = PL_VAR_FLOAT, [PL_FMT_UNORM] = PL_VAR_FLOAT, [PL_FMT_SNORM] = PL_VAR_FLOAT, [PL_FMT_UINT] = PL_VAR_UINT, [PL_FMT_SINT] = PL_VAR_SINT, }; pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes)); return (struct pl_var) { .type = vartypes[fmt->type], .name = name, .dim_v = fmt->num_components, .dim_m = 1, .dim_a = 1, }; } struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var) { size_t col_size = pl_var_type_size(var->type) * var->dim_v; return (struct pl_var_layout) { .offset = offset, .stride = col_size, .size = col_size * var->dim_m * var->dim_a, }; } struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std140 packing rules: // 1. The size of generic values is their size in bytes // 2. The size of vectors is the vector length * the base count // 3. Matrices are treated like arrays of column vectors // 4. The size of array rows is that of the element size rounded up to // the nearest multiple of vec4 // 5. All values are aligned to a multiple of their size (stride for arrays), // with the exception of vec3 which is aligned like vec4 size_t stride = el_size * var->dim_v; size_t align = stride; if (var->dim_v == 3) align += el_size; if (var->dim_m * var->dim_a > 1) stride = align = PL_ALIGN2(align, sizeof(float[4])); return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, align), .stride = stride, .size = stride * var->dim_m * var->dim_a, }; } struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std430 packing rules: like std140, except arrays/matrices are always // "tightly" packed, even arrays/matrices of vec3s size_t stride = el_size * var->dim_v; size_t align = stride; if (var->dim_v == 3) align += el_size; if (var->dim_m * var->dim_a > 1) stride = align; return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, align), .stride = stride, .size = stride * var->dim_m * var->dim_a, }; } void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout, const void *src_p, struct pl_var_layout src_layout) { uintptr_t src = (uintptr_t) src_p + src_layout.offset; uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset; if (src_layout.stride == dst_layout.stride) { pl_assert(dst_layout.size == src_layout.size); memcpy((void *) dst, (const void *) src, src_layout.size); return; } size_t stride = PL_MIN(src_layout.stride, dst_layout.stride); uintptr_t end = src + src_layout.size; while (src < end) { pl_assert(dst < dst + dst_layout.size); memcpy((void *) dst, (const void *) src, stride); src += src_layout.stride; dst += dst_layout.stride; } } int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); int ret = impl->desc_namespace(gpu, type); pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT); return ret; } const char *pl_desc_access_glsl_name(enum pl_desc_access mode) { switch (mode) { case PL_DESC_ACCESS_READWRITE: return ""; case PL_DESC_ACCESS_READONLY: return "readonly"; case PL_DESC_ACCESS_WRITEONLY: return "writeonly"; case PL_DESC_ACCESS_COUNT: break; } pl_unreachable(); } const struct pl_blend_params pl_alpha_overlay = { .src_rgb = PL_BLEND_SRC_ALPHA, .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA, .src_alpha = PL_BLEND_ONE, .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA, }; pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_pass_params fixed; require(params->glsl_shader); switch(params->type) { case PL_PASS_RASTER: require(params->vertex_shader); require(params->vertex_stride % gpu->limits.align_vertex_stride == 0); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib va = params->vertex_attribs[i]; require(va.name); require(va.fmt); require(va.fmt->caps & PL_FMT_CAP_VERTEX); require(va.offset + va.fmt->texel_size <= params->vertex_stride); } if (!params->target_format) { // Compatibility with older API fixed = *params; fixed.target_format = params->target_dummy.params.format; params = &fixed; } require(params->target_format); require(params->target_format->caps & PL_FMT_CAP_RENDERABLE); require(!params->blend_params || params->target_format->caps & PL_FMT_CAP_BLENDABLE); require(!params->blend_params || params->load_target); break; case PL_PASS_COMPUTE: require(gpu->glsl.compute); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } size_t num_var_comps = 0; for (int i = 0; i < params->num_variables; i++) { struct pl_var var = params->variables[i]; num_var_comps += var.dim_v * var.dim_m * var.dim_a; require(var.name); require(pl_var_glsl_type_name(var)); } require(num_var_comps <= gpu->limits.max_variable_comps); require(params->num_constants <= gpu->limits.max_constants); for (int i = 0; i < params->num_constants; i++) require(params->constants[i].type); for (int i = 0; i < params->num_descriptors; i++) { struct pl_desc desc = params->descriptors[i]; require(desc.name); // enforce disjoint descriptor bindings for each namespace int namespace = pl_desc_namespace(gpu, desc.type); for (int j = i+1; j < params->num_descriptors; j++) { struct pl_desc other = params->descriptors[j]; require(desc.binding != other.binding || namespace != pl_desc_namespace(gpu, other.type)); } } require(params->push_constants_size <= gpu->limits.max_pushc_size); require(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4)); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->pass_create(gpu, params); error: return NULL; } void pl_pass_destroy(pl_gpu gpu, pl_pass *pass) { if (!*pass) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->pass_destroy(gpu, *pass); *pass = NULL; } void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { pl_pass pass = params->pass; struct pl_pass_run_params new = *params; for (int i = 0; i < pass->params.num_descriptors; i++) { struct pl_desc desc = pass->params.descriptors[i]; struct pl_desc_binding db = params->desc_bindings[i]; require(db.object); switch (desc.type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db.object; pl_fmt fmt = tex->params.format; require(tex->params.sampleable); require(db.sample_mode != PL_TEX_SAMPLE_LINEAR || (fmt->caps & PL_FMT_CAP_LINEAR)); break; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db.object; pl_fmt fmt = tex->params.format; require(tex->params.storable); require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE)); break; } case PL_DESC_BUF_UNIFORM: { pl_buf buf = db.object; require(buf->params.uniform); break; } case PL_DESC_BUF_STORAGE: { pl_buf buf = db.object; require(buf->params.storable); break; } case PL_DESC_BUF_TEXEL_UNIFORM: { pl_buf buf = db.object; require(buf->params.uniform && buf->params.format); break; } case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = db.object; pl_fmt fmt = buf->params.format; require(buf->params.storable && buf->params.format); require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE)); break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } for (int i = 0; i < params->num_var_updates; i++) { struct pl_var_update vu = params->var_updates[i]; require(vu.index >= 0 && vu.index < pass->params.num_variables); require(vu.data); } require(params->push_constants || !pass->params.push_constants_size); switch (pass->params.type) { case PL_PASS_RASTER: { switch (pass->params.vertex_type) { case PL_PRIM_TRIANGLE_LIST: require(params->vertex_count % 3 == 0); // fall through case PL_PRIM_TRIANGLE_STRIP: require(params->vertex_count >= 3); break; case PL_PRIM_TYPE_COUNT: pl_unreachable(); } require(!params->vertex_data ^ !params->vertex_buf); if (params->vertex_buf) { pl_buf vertex_buf = params->vertex_buf; require(vertex_buf->params.drawable); if (!params->index_data && !params->index_buf) { // Cannot bounds check indexed draws size_t vert_size = params->vertex_count * pass->params.vertex_stride; require(params->buf_offset + vert_size <= vertex_buf->params.size); } } require(!params->index_data || !params->index_buf); if (params->index_buf) { pl_buf index_buf = params->index_buf; require(!params->vertex_data); require(index_buf->params.drawable); size_t index_size = pl_index_buf_size(params); require(params->index_offset + index_size <= index_buf->params.size); } pl_tex target = params->target; require(target); require(pl_tex_params_dimension(target->params) == 2); require(target->params.format->signature == pass->params.target_format->signature); require(target->params.renderable); struct pl_rect2d *vp = &new.viewport; struct pl_rect2d *sc = &new.scissors; // Sanitize viewport/scissors if (!vp->x0 && !vp->x1) vp->x1 = target->params.w; if (!vp->y0 && !vp->y1) vp->y1 = target->params.h; if (!sc->x0 && !sc->x1) sc->x1 = target->params.w; if (!sc->y0 && !sc->y1) sc->y1 = target->params.h; // Constrain the scissors to the target dimension (to sanitize the // underlying graphics API calls) sc->x0 = PL_CLAMP(sc->x0, 0, target->params.w); sc->y0 = PL_CLAMP(sc->y0, 0, target->params.h); sc->x1 = PL_CLAMP(sc->x1, 0, target->params.w); sc->y1 = PL_CLAMP(sc->y1, 0, target->params.h); // Scissors wholly outside target -> silently drop pass (also needed // to ensure we don't cause UB by specifying invalid scissors) if (!pl_rect_w(*sc) || !pl_rect_h(*sc)) return; require(pl_rect_w(*vp) > 0); require(pl_rect_h(*vp) > 0); require(pl_rect_w(*sc) > 0); require(pl_rect_h(*sc) > 0); if (!pass->params.load_target) pl_tex_invalidate(gpu, target); break; } case PL_PASS_COMPUTE: for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) { require(params->compute_groups[i] >= 0); require(params->compute_groups[i] <= gpu->limits.max_dispatch[i]); } break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->pass_run(gpu, &new); error: return; } void pl_gpu_flush(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->gpu_flush) impl->gpu_flush(gpu); } void pl_gpu_finish(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->gpu_finish(gpu); } bool pl_gpu_is_failed(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (!impl->gpu_is_failed) return false; return impl->gpu_is_failed(gpu); } // GPU-internal helpers bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_upload(gpu, params); pl_buf buf = NULL; struct pl_buf_params bufparams = { .size = pl_tex_transfer_size(params), .debug_tag = PL_DEBUG_TAG, }; // If we can import host pointers directly, and the function is being used // asynchronously, then we can use host pointer import to skip a memcpy. In // the synchronous case, we still force a host memcpy to avoid stalling the // host until the GPU memcpy completes. bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR; if (can_import && params->callback && bufparams.size > 32*1024) { // 32 KiB bufparams.import_handle = PL_HANDLE_HOST_PTR; bufparams.shared_mem = (struct pl_shared_mem) { .handle.ptr = params->ptr, .size = bufparams.size, .offset = 0, }; // Suppress errors for this test because it may fail, in which case we // want to silently fall back. pl_log_level_cap(gpu->log, PL_LOG_DEBUG); buf = pl_buf_create(gpu, &bufparams); pl_log_level_cap(gpu->log, PL_LOG_NONE); } if (!buf) { bufparams.import_handle = 0; bufparams.host_writable = true; buf = pl_buf_create(gpu, &bufparams); } if (!buf) return false; if (!bufparams.import_handle) pl_buf_write(gpu, buf, 0, params->ptr, buf->params.size); struct pl_tex_transfer_params newparams = *params; newparams.buf = buf; newparams.ptr = NULL; bool ok = pl_tex_upload(gpu, &newparams); pl_buf_destroy(gpu, &buf); return ok; } struct pbo_cb_ctx { pl_gpu gpu; pl_buf buf; void *ptr; void (*callback)(void *priv); void *priv; }; static void pbo_download_cb(void *priv) { struct pbo_cb_ctx *p = priv; pl_buf_read(p->gpu, p->buf, 0, p->ptr, p->buf->params.size); pl_buf_destroy(p->gpu, &p->buf); // Run the original callback p->callback(p->priv); pl_free(priv); }; bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_download(gpu, params); pl_buf buf = NULL; struct pl_buf_params bufparams = { .size = pl_tex_transfer_size(params), .debug_tag = PL_DEBUG_TAG, }; // If we can import host pointers directly, we can avoid an extra memcpy // (sometimes). In the cases where it isn't avoidable, the extra memcpy // will happen inside VRAM, which is typically faster anyway. bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR; if (can_import && bufparams.size > 32*1024) { // 32 KiB bufparams.import_handle = PL_HANDLE_HOST_PTR; bufparams.shared_mem = (struct pl_shared_mem) { .handle.ptr = params->ptr, .size = bufparams.size, .offset = 0, }; // Suppress errors for this test because it may fail, in which case we // want to silently fall back. pl_log_level_cap(gpu->log, PL_LOG_DEBUG); buf = pl_buf_create(gpu, &bufparams); pl_log_level_cap(gpu->log, PL_LOG_NONE); } if (!buf) { // Fallback when host pointer import is not supported bufparams.import_handle = 0; bufparams.host_readable = true; buf = pl_buf_create(gpu, &bufparams); } if (!buf) return false; struct pl_tex_transfer_params newparams = *params; newparams.ptr = NULL; newparams.buf = buf; // If the transfer is asynchronous, propagate our host read asynchronously if (params->callback && !bufparams.import_handle) { newparams.callback = pbo_download_cb; newparams.priv = pl_alloc_struct(NULL, struct pbo_cb_ctx, { .gpu = gpu, .buf = buf, .ptr = params->ptr, .callback = params->callback, .priv = params->priv, }); } if (!pl_tex_download(gpu, &newparams)) { pl_buf_destroy(gpu, &buf); return false; } if (!params->callback) { while (pl_buf_poll(gpu, buf, 10000000)) // 10 ms PL_TRACE(gpu, "pl_tex_download: synchronous/blocking (slow path)"); } bool ok; if (bufparams.import_handle) { // Buffer download completion already means the host pointer contains // the valid data, no more need to copy. (Note: this applies even for // asynchronous downloads) ok = true; pl_buf_destroy(gpu, &buf); } else if (!params->callback) { // Synchronous read back to the host pointer ok = pl_buf_read(gpu, buf, 0, params->ptr, bufparams.size); pl_buf_destroy(gpu, &buf); } else { // Nothing left to do here, the rest will be done by pbo_download_cb ok = true; } return ok; } bool pl_tex_upload_texel(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_transfer_params *params) { const int threads = PL_MIN(256, pl_rect_w(params->rc)); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; require(params->buf); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, false, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } bool ubo = params->buf->params.uniform; ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->buf, .desc = { .name = "data", .type = ubo ? PL_DESC_BUF_TEXEL_UNIFORM : PL_DESC_BUF_TEXEL_STORAGE, }, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, }); // If the transfer width is a natural multiple of the thread size, we // can skip the bounds check. Otherwise, make sure we aren't blitting out // of the range since this would read out of bounds. int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads; if (groups_x * threads != pl_rect_w(params->rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(params->rc)); } // fmt->texel_align contains the size of an individual color value assert(fmt->texel_size == fmt->num_components * fmt->texel_align); GLSL("vec4 color = vec4(0.0); \n" "ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n" "int base = pos.z * %s + pos.y * %s + pos.x * %s; \n", params->rc.x0, params->rc.y0, params->rc.z0, SH_INT(params->depth_pitch / fmt->texel_align), SH_INT(params->row_pitch / fmt->texel_align), SH_INT(fmt->texel_size / fmt->texel_align)); for (int i = 0; i < fmt->num_components; i++) { GLSL("color[%d] = %s(%s, base + %d).r; \n", i, ubo ? "texelFetch" : "imageLoad", buf, i); } int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; GLSL("imageStore(%s, %s(pos), color);\n", img, coord_types[dims]); return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc), }, )); error: return false; } bool pl_tex_download_texel(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_transfer_params *params) { const int threads = PL_MIN(256, pl_rect_w(params->rc)); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; require(params->buf); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, false, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->buf, .desc = { .name = "data", .type = PL_DESC_BUF_TEXEL_STORAGE, }, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_READONLY, }, }); int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads; if (groups_x * threads != pl_rect_w(params->rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(params->rc)); } int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; assert(fmt->texel_size == fmt->num_components * fmt->texel_align); GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n" "int base = pos.z * %s + pos.y * %s + pos.x * %s; \n" "vec4 color = imageLoad(%s, %s(pos)); \n", params->rc.x0, params->rc.y0, params->rc.z0, SH_INT(params->depth_pitch / fmt->texel_align), SH_INT(params->row_pitch / fmt->texel_align), SH_INT(fmt->texel_size / fmt->texel_align), img, coord_types[dims]); for (int i = 0; i < fmt->num_components; i++) GLSL("imageStore(%s, base + %d, vec4(color[%d])); \n", buf, i, i); return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc), }, )); error: return false; } bool pl_tex_blit_compute(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_blit_params *params) { if (!params->src->params.storable || !params->dst->params.storable) return false; // Normalize `dst_rc`, moving all flipping to `src_rc` instead. struct pl_rect3d src_rc = params->src_rc; struct pl_rect3d dst_rc = params->dst_rc; if (pl_rect_w(dst_rc) < 0) { PL_SWAP(src_rc.x0, src_rc.x1); PL_SWAP(dst_rc.x0, dst_rc.x1); } if (pl_rect_h(dst_rc) < 0) { PL_SWAP(src_rc.y0, src_rc.y1); PL_SWAP(dst_rc.y0, dst_rc.y1); } if (pl_rect_d(dst_rc) < 0) { PL_SWAP(src_rc.z0, src_rc.z1); PL_SWAP(dst_rc.z0, dst_rc.z1); } bool needs_scaling = false; needs_scaling |= pl_rect_w(dst_rc) != abs(pl_rect_w(src_rc)); needs_scaling |= pl_rect_h(dst_rc) != abs(pl_rect_h(src_rc)); needs_scaling |= pl_rect_d(dst_rc) != abs(pl_rect_d(src_rc)); // Manual trilinear interpolation would be too slow to justify bool needs_sampling = needs_scaling && params->sample_mode != PL_TEX_SAMPLE_NEAREST; if (needs_sampling && !params->src->params.sampleable) return false; const int threads = 256; int bw = PL_MIN(32, pl_rect_w(dst_rc)); int bh = PL_MIN(threads / bw, pl_rect_h(dst_rc)); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, bw, bh, false, 0)) { pl_dispatch_abort(dp, &sh); return false; } // Avoid over-writing into `dst` int groups_x = (pl_rect_w(dst_rc) + bw - 1) / bw; if (groups_x * bw != pl_rect_w(dst_rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(dst_rc)); } int groups_y = (pl_rect_h(dst_rc) + bh - 1) / bh; if (groups_y * bh != pl_rect_h(dst_rc)) { GLSL("if (gl_GlobalInvocationID.y >= %d) \n" " return; \n", pl_rect_h(dst_rc)); } ident_t dst = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->dst, .desc = { .name = "dst", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, }); static const char *vecs[] = { [1] = "float", [2] = "vec2", [3] = "vec3", [4] = "vec4", }; static const char *ivecs[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", [4] = "ivec4", }; int src_dims = pl_tex_params_dimension(params->src->params); int dst_dims = pl_tex_params_dimension(params->dst->params); GLSL("const ivec3 pos = ivec3(gl_GlobalInvocationID); \n" "%s dst_pos = %s(pos + ivec3(%d, %d, %d)); \n", ivecs[dst_dims], ivecs[dst_dims], params->dst_rc.x0, params->dst_rc.y0, params->dst_rc.z0); if (needs_sampling || (needs_scaling && params->src->params.sampleable)) { ident_t src = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "src", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = params->src, .address_mode = PL_TEX_ADDRESS_CLAMP, .sample_mode = params->sample_mode, } }); GLSL("vec3 fpos = (vec3(pos) + vec3(0.5)) / vec3(%d.0, %d.0, %d.0); \n" "%s src_pos = %s(0.5); \n" "src_pos.x = mix(%f, %f, fpos.x); \n", pl_rect_w(dst_rc), pl_rect_h(dst_rc), pl_rect_d(dst_rc), vecs[src_dims], vecs[src_dims], (float) src_rc.x0 / params->src->params.w, (float) src_rc.x1 / params->src->params.w); if (params->src->params.h) { GLSL("src_pos.y = mix(%f, %f, fpos.y); \n", (float) src_rc.y0 / params->src->params.h, (float) src_rc.y1 / params->src->params.h); } if (params->src->params.d) { GLSL("src_pos.z = mix(%f, %f, fpos.z); \n", (float) src_rc.z0 / params->src->params.d, (float) src_rc.z1 / params->src->params.d); } GLSL("imageStore(%s, dst_pos, %s(%s, src_pos)); \n", dst, sh_tex_fn(sh, params->src->params), src); } else { ident_t src = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->src, .desc = { .name = "src", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_READONLY, }, }); if (needs_scaling) { GLSL("ivec3 src_pos = ivec3(round(vec3(%f, %f, %f) * vec3(pos))); \n", fabs((float) pl_rect_w(src_rc) / pl_rect_w(dst_rc)), fabs((float) pl_rect_h(src_rc) / pl_rect_h(dst_rc)), fabs((float) pl_rect_d(src_rc) / pl_rect_d(dst_rc))); } else { GLSL("ivec3 src_pos = pos; \n"); } GLSL("src_pos = ivec3(%d, %d, %d) * src_pos + ivec3(%d, %d, %d); \n" "imageStore(%s, dst_pos, imageLoad(%s, %s(src_pos))); \n", src_rc.x1 < src_rc.x0 ? -1 : 1, src_rc.y1 < src_rc.y0 ? -1 : 1, src_rc.z1 < src_rc.z0 ? -1 : 1, src_rc.x0, src_rc.y0, src_rc.z0, dst, src, ivecs[src_dims]); } return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, groups_y, pl_rect_d(dst_rc), }, )); } void pl_tex_blit_raster(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_blit_params *params) { enum pl_fmt_type src_type = params->src->params.format->type; enum pl_fmt_type dst_type = params->dst->params.format->type; // Only for 2D textures pl_assert(params->src->params.h && !params->src->params.d); pl_assert(params->dst->params.h && !params->dst->params.d); // Integer textures are not supported pl_assert(src_type != PL_FMT_UINT && src_type != PL_FMT_SINT); pl_assert(dst_type != PL_FMT_UINT && dst_type != PL_FMT_SINT); struct pl_rect2df src_rc = { .x0 = params->src_rc.x0, .x1 = params->src_rc.x1, .y0 = params->src_rc.y0, .y1 = params->src_rc.y1, }; struct pl_rect2d dst_rc = { .x0 = params->dst_rc.x0, .x1 = params->dst_rc.x1, .y0 = params->dst_rc.y0, .y1 = params->dst_rc.y1, }; pl_shader sh = pl_dispatch_begin(dp); sh->res.output = PL_SHADER_SIG_COLOR; ident_t pos, src = sh_bind(sh, params->src, PL_TEX_ADDRESS_CLAMP, params->sample_mode, "src_tex", &src_rc, &pos, NULL, NULL); GLSL("vec4 color = %s(%s, %s); \n", sh_tex_fn(sh, params->src->params), src, pos); pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = params->dst, .rect = dst_rc, )); } void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params) { if (!params->vertex_data && !params->index_data) return pl_pass_run(gpu, params); struct pl_pass_run_params newparams = *params; pl_buf vert = NULL, index = NULL; if (params->vertex_data) { vert = pl_buf_create(gpu, pl_buf_params( .size = pl_vertex_buf_size(params), .initial_data = params->vertex_data, .drawable = true, )); if (!vert) { PL_ERR(gpu, "Failed allocating vertex buffer!"); return; } newparams.vertex_buf = vert; newparams.vertex_data = NULL; } if (params->index_data) { index = pl_buf_create(gpu, pl_buf_params( .size = pl_index_buf_size(params), .initial_data = params->index_data, .drawable = true, )); if (!index) { PL_ERR(gpu, "Failed allocating index buffer!"); return; } newparams.index_buf = index; newparams.index_data = NULL; } pl_pass_run(gpu, &newparams); pl_buf_destroy(gpu, &vert); pl_buf_destroy(gpu, &index); } struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params) { struct pl_pass_params new = *params; new.cached_program = NULL; new.cached_program_len = 0; new.glsl_shader = pl_str0dup0(alloc, new.glsl_shader); new.vertex_shader = pl_str0dup0(alloc, new.vertex_shader); if (new.blend_params) new.blend_params = pl_memdup_ptr(alloc, new.blend_params); #define DUPNAMES(field) \ do { \ size_t _size = new.num_##field * sizeof(new.field[0]); \ new.field = pl_memdup(alloc, new.field, _size); \ for (int j = 0; j < new.num_##field; j++) \ new.field[j].name = pl_str0dup0(alloc, new.field[j].name); \ } while (0) DUPNAMES(variables); DUPNAMES(descriptors); DUPNAMES(vertex_attribs); #undef DUPNAMES new.constant_data = NULL; new.constants = pl_memdup(alloc, new.constants, new.num_constants * sizeof(new.constants[0])); return new; } pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type) { require(handle_type); require(handle_type & gpu->export_caps.sync); require(PL_ISPOT(handle_type)); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->sync_create(gpu, handle_type); error: return NULL; } void pl_sync_destroy(pl_gpu gpu, pl_sync *sync) { if (!*sync) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->sync_destroy(gpu, *sync); *sync = NULL; } bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync) { require(tex->params.import_handle || tex->params.export_handle); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_export(gpu, tex, sync); error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } pl_timer pl_timer_create(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (!impl->timer_create) return NULL; return impl->timer_create(gpu); } void pl_timer_destroy(pl_gpu gpu, pl_timer *timer) { if (!*timer) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->timer_destroy(gpu, *timer); *timer = NULL; } uint64_t pl_timer_query(pl_gpu gpu, pl_timer timer) { if (!timer) return 0; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->timer_query(gpu, timer); } const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE]) { static const char *hexdigits = "0123456789ABCDEF"; for (int i = 0; i < UUID_SIZE; i++) { uint8_t x = uuid[i]; buf[3 * i + 0] = hexdigits[x >> 4]; buf[3 * i + 1] = hexdigits[x & 0xF]; buf[3 * i + 2] = i == UUID_SIZE - 1 ? '\0' : ':'; } return buf; } const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod) { switch (mod) { case DRM_FORMAT_MOD_LINEAR: return "LINEAR"; case DRM_FORMAT_MOD_INVALID: return "INVALID"; } uint8_t vendor = mod >> 56; uint64_t val = mod & ((1ULL << 56) - 1); const char *name = NULL; switch (vendor) { case 0x00: name = "NONE"; break; case 0x01: name = "INTEL"; break; case 0x02: name = "AMD"; break; case 0x03: name = "NVIDIA"; break; case 0x04: name = "SAMSUNG"; break; case 0x08: name = "ARM"; break; } if (name) { snprintf(buf, DRM_MOD_SIZE, "%s 0x%"PRIx64, name, val); } else { snprintf(buf, DRM_MOD_SIZE, "0x%02x 0x%"PRIx64, vendor, val); } return buf; } libplacebo-v4.192.1/src/gpu.h000066400000000000000000000161121417677245700157320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "log.h" // To avoid having to include drm_fourcc.h #ifndef DRM_FORMAT_MOD_LINEAR #define DRM_FORMAT_MOD_LINEAR UINT64_C(0x0) #define DRM_FORMAT_MOD_INVALID ((UINT64_C(1) << 56) - 1) #endif // This struct must be the first member of the gpu's priv struct. The `pl_gpu` // helpers will cast the priv struct to this struct! #define GPU_PFN(name) __typeof__(pl_##name) *name struct pl_gpu_fns { // Destructors: These also free the corresponding objects, but they // must not be called on NULL. (The NULL checks are done by the pl_*_destroy // wrappers) void (*destroy)(pl_gpu gpu); void (*tex_destroy)(pl_gpu, pl_tex); void (*buf_destroy)(pl_gpu, pl_buf); void (*pass_destroy)(pl_gpu, pl_pass); void (*sync_destroy)(pl_gpu, pl_sync); void (*timer_destroy)(pl_gpu, pl_timer); GPU_PFN(tex_create); GPU_PFN(tex_invalidate); // optional GPU_PFN(tex_clear_ex); // optional if no blittable formats GPU_PFN(tex_blit); // optional if no blittable formats GPU_PFN(tex_upload); GPU_PFN(tex_download); GPU_PFN(tex_poll); // optional: if NULL, textures are always free to use GPU_PFN(buf_create); GPU_PFN(buf_write); GPU_PFN(buf_read); GPU_PFN(buf_copy); GPU_PFN(buf_export); // optional if !gpu->export_caps.buf GPU_PFN(buf_poll); // optional: if NULL, buffers are always free to use GPU_PFN(desc_namespace); GPU_PFN(pass_create); GPU_PFN(pass_run); GPU_PFN(sync_create); // optional if !gpu->export_caps.sync GPU_PFN(tex_export); // optional if !gpu->export_caps.sync GPU_PFN(timer_create); // optional GPU_PFN(timer_query); // optional GPU_PFN(gpu_flush); // optional GPU_PFN(gpu_finish); GPU_PFN(gpu_is_failed); // optional }; #undef GPU_PFN // All resources such as textures and buffers allocated from the GPU must be // destroyed before calling pl_destroy. void pl_gpu_destroy(pl_gpu gpu); // Returns true if the device supports interop. This is considered to be // the case if at least one of `gpu->export/import_caps` is nonzero. static inline bool pl_gpu_supports_interop(pl_gpu gpu) { return gpu->export_caps.tex || gpu->import_caps.tex || gpu->export_caps.buf || gpu->import_caps.buf || gpu->export_caps.sync || gpu->import_caps.sync; } // GPU-internal helpers: these should not be used outside of GPU implementations // This performs several tasks. It sorts the format list, logs GPU metadata, // performs verification and fixes up backwards compatibility fields. This // should be returned as the last step when creating a `pl_gpu`. pl_gpu pl_gpu_finalize(struct pl_gpu *gpu); // Look up the right GLSL image format qualifier from a partially filled-in // pl_fmt, or NULL if the format does not have a legal matching GLSL name. // // `components` may differ from fmt->num_components (for emulated formats) const char *pl_fmt_glsl_format(pl_fmt fmt, int components); // Look up the right fourcc from a partially filled-in pl_fmt, or 0 if the // format does not have a legal matching fourcc format. uint32_t pl_fmt_fourcc(pl_fmt fmt); // Compute the total size (in bytes) of a texture transfer operation size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par); // Helper that wraps pl_tex_upload/download using texture upload buffers to // ensure that params->buf is always set. bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params); bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params); // This requires that params.buf has been set and is of type PL_BUF_TEXEL_* bool pl_tex_upload_texel(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_transfer_params *params); bool pl_tex_download_texel(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_transfer_params *params); // Both `src` and `dst must be storable. `src` must also be sampleable, if the // blit requires linear sampling. Returns false if these conditions are unmet. bool pl_tex_blit_compute(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_blit_params *params); // Helper to do a 2D blit with stretch and scale using a raster pass void pl_tex_blit_raster(pl_gpu gpu, pl_dispatch dp, const struct pl_tex_blit_params *params); void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params); // Make a deep-copy of the pass params. Note: cached_program etc. are not // copied, but cleared explicitly. struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params); // Helper to compute the size of an index buffer static inline size_t pl_index_buf_size(const struct pl_pass_run_params *params) { switch (params->index_fmt) { case PL_INDEX_UINT16: return params->vertex_count * sizeof(uint16_t); case PL_INDEX_UINT32: return params->vertex_count * sizeof(uint32_t); case PL_INDEX_FORMAT_COUNT: break; } pl_unreachable(); } // Helper to compute the size of a vertex buffer required to fit all indices static inline size_t pl_vertex_buf_size(const struct pl_pass_run_params *params) { if (!params->index_data) return params->vertex_count * params->pass->params.vertex_stride; int num_vertices = 0; const void *idx = params->index_data; switch (params->index_fmt) { case PL_INDEX_UINT16: for (int i = 0; i < params->vertex_count; i++) num_vertices = PL_MAX(num_vertices, ((const uint16_t *) idx)[i]); break; case PL_INDEX_UINT32: for (int i = 0; i < params->vertex_count; i++) num_vertices = PL_MAX(num_vertices, ((const uint32_t *) idx)[i]); break; case PL_INDEX_FORMAT_COUNT: pl_unreachable(); } return (num_vertices + 1) * params->pass->params.vertex_stride; } // Utility function for pretty-printing UUIDs #define UUID_SIZE 16 #define PRINT_UUID(uuid) (print_uuid((char[3 * UUID_SIZE]){0}, (uuid))) const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE]); // Helper to pretty-print fourcc codes #define PRINT_FOURCC(fcc) \ (!(fcc) ? "" : (char[5]) { \ (fcc) & 0xFF, \ ((fcc) >> 8) & 0xFF, \ ((fcc) >> 16) & 0xFF, \ ((fcc) >> 24) & 0xFF \ }) #define DRM_MOD_SIZE 26 #define PRINT_DRM_MOD(mod) (print_drm_mod((char[DRM_MOD_SIZE]){0}, (mod))) const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod); libplacebo-v4.192.1/src/include/000077500000000000000000000000001417677245700164105ustar00rootroot00000000000000libplacebo-v4.192.1/src/include/libplacebo/000077500000000000000000000000001417677245700205045ustar00rootroot00000000000000libplacebo-v4.192.1/src/include/libplacebo/colorspace.h000066400000000000000000000630371417677245700230200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COLORSPACE_H_ #define LIBPLACEBO_COLORSPACE_H_ #include #include #include #include PL_API_BEGIN // The underlying color representation (e.g. RGB, XYZ or YCbCr) enum pl_color_system { PL_COLOR_SYSTEM_UNKNOWN = 0, // YCbCr-like color systems: PL_COLOR_SYSTEM_BT_601, // ITU-R Rec. BT.601 (SD) PL_COLOR_SYSTEM_BT_709, // ITU-R Rec. BT.709 (HD) PL_COLOR_SYSTEM_SMPTE_240M, // SMPTE-240M PL_COLOR_SYSTEM_BT_2020_NC, // ITU-R Rec. BT.2020 (non-constant luminance) PL_COLOR_SYSTEM_BT_2020_C, // ITU-R Rec. BT.2020 (constant luminance) PL_COLOR_SYSTEM_BT_2100_PQ, // ITU-R Rec. BT.2100 ICtCp PQ variant PL_COLOR_SYSTEM_BT_2100_HLG, // ITU-R Rec. BT.2100 ICtCp HLG variant PL_COLOR_SYSTEM_DOLBYVISION, // Dolby Vision (see pl_dovi_metadata) PL_COLOR_SYSTEM_YCGCO, // YCgCo (derived from RGB) // Other color systems: PL_COLOR_SYSTEM_RGB, // Red, Green and Blue PL_COLOR_SYSTEM_XYZ, // CIE 1931 XYZ, pre-encoded with gamma 2.6 PL_COLOR_SYSTEM_COUNT }; bool pl_color_system_is_ycbcr_like(enum pl_color_system sys); // Returns true for color systems that are linear transformations of the RGB // equivalent, i.e. are simple matrix multiplications. For color systems with // this property, `pl_color_repr_decode` is sufficient for conversion to RGB. bool pl_color_system_is_linear(enum pl_color_system sys); // Guesses the best YCbCr-like colorspace based on a image given resolution. // This only picks conservative values. (In particular, BT.2020 is never // auto-guessed, even for 4K resolution content) enum pl_color_system pl_color_system_guess_ycbcr(int width, int height); // Friendly names for the canonical channel names and order. enum pl_channel { PL_CHANNEL_NONE = -1, PL_CHANNEL_A = 3, // alpha // RGB system PL_CHANNEL_R = 0, PL_CHANNEL_G = 1, PL_CHANNEL_B = 2, // YCbCr-like systems PL_CHANNEL_Y = 0, PL_CHANNEL_CB = 1, PL_CHANNEL_CR = 2, // Aliases for Cb/Cr PL_CHANNEL_U = 1, PL_CHANNEL_V = 2 // There are deliberately no names for the XYZ system to avoid // confusion due to PL_CHANNEL_Y. }; // The numerical range of the representation (where applicable). enum pl_color_levels { PL_COLOR_LEVELS_UNKNOWN = 0, PL_COLOR_LEVELS_LIMITED, // Limited/TV range, e.g. 16-235 PL_COLOR_LEVELS_FULL, // Full/PC range, e.g. 0-255 PL_COLOR_LEVELS_COUNT, // Compatibility aliases PL_COLOR_LEVELS_TV = PL_COLOR_LEVELS_LIMITED, PL_COLOR_LEVELS_PC = PL_COLOR_LEVELS_FULL, }; // The alpha representation mode. enum pl_alpha_mode { PL_ALPHA_UNKNOWN = 0, // or no alpha channel present PL_ALPHA_INDEPENDENT, // alpha channel is separate from the video PL_ALPHA_PREMULTIPLIED, // alpha channel is multiplied into the colors PL_ALPHA_MODE_COUNT, }; // The underlying bit-wise representation of a color sample. For example, // a 10-bit TV-range YCbCr value uploaded to a 16 bit texture would have // sample_depth=16 color_depth=10 bit_shift=0. // // For another example, a 12-bit XYZ full range sample shifted to 16-bits with // the lower 4 bits all set to 0 would have sample_depth=16 color_depth=12 // bit_shift=4. (libavcodec likes outputting this type of `xyz12`) // // To explain the meaning of `sample_depth` further; the consideration factor // here is the fact that GPU sampling will normalized the sampled color to the // range 0.0 - 1.0 in a manner dependent on the number of bits in the texture // format. So if you upload a 10-bit YCbCr value unpadded as 16-bit color // samples, all of the sampled values will be extremely close to 0.0. In such a // case, `pl_color_repr_normalize` would return a high scaling factor, which // would pull the color up to their 16-bit range. struct pl_bit_encoding { int sample_depth; // the number of bits the color is stored/sampled as int color_depth; // the effective number of bits of the color information int bit_shift; // a representational bit shift applied to the color }; // Returns whether two bit encodings are exactly identical. bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2); // Parsed metadata from the Dolby Vision RPU struct pl_dovi_metadata { // Colorspace transformation metadata float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") struct pl_matrix3x3 nonlinear; // before PQ, also called "ycc_to_rgb" struct pl_matrix3x3 linear; // after PQ, also called "rgb_to_lms" // Reshape data, grouped by component struct pl_reshape_data { uint8_t num_pivots; float pivots[9]; // normalized to [0.0, 1.0] based on BL bit depth uint8_t method[8]; // 0 = polynomial, 1 = MMR // Note: these must be normalized (divide by coefficient_log2_denom) float poly_coeffs[8][3]; // x^0, x^1, x^2, unused must be 0 uint8_t mmr_order[8]; // 1, 2 or 3 float mmr_constant[8]; float mmr_coeffs[8][3 /* order */][7]; } comp[3]; }; // Struct describing the underlying color system and representation. This // information is needed to convert an encoded color to a normalized RGB triple // in the range 0-1. struct pl_color_repr { enum pl_color_system sys; enum pl_color_levels levels; enum pl_alpha_mode alpha; struct pl_bit_encoding bits; // or {0} if unknown // Metadata for PL_COLOR_SYSTEM_DOLBYVISION. Note that, for the sake of // efficiency, this is treated purely as an opaque reference - functions // like pl_color_repr_equal will merely do a pointer equality test. // // The only functions that actually dereference it in any way are // pl_color_repr_decode, pl_shader_decode_color and pl_render_image(_mix). const struct pl_dovi_metadata *dovi; }; // Some common color representations. It's worth pointing out that all of these // presets leave `alpha` and `bits` as unknown - that is, only the system and // levels are predefined extern const struct pl_color_repr pl_color_repr_unknown; extern const struct pl_color_repr pl_color_repr_rgb; extern const struct pl_color_repr pl_color_repr_sdtv; extern const struct pl_color_repr pl_color_repr_hdtv; // also Blu-ray extern const struct pl_color_repr pl_color_repr_uhdtv; // SDR, NCL system extern const struct pl_color_repr pl_color_repr_jpeg; // Returns whether two colorspace representations are exactly identical. bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2); // Replaces unknown values in the first struct by those of the second struct. void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *update); // This function normalizes the color representation such that // color_depth=sample_depth and bit_shift=0; and returns the scaling factor // that must be multiplied into the color value to accomplish this, assuming // it has already been sampled by the GPU. If unknown, the color and sample // depth will both be inferred as 8 bits for the purposes of this conversion. float pl_color_repr_normalize(struct pl_color_repr *repr); // Guesses the best color levels based on the specified color levels and // falling back to using the color system instead. YCbCr-like systems are // assumed to be TV range, otherwise this defaults to PC range. enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr); // The colorspace's primaries (gamut) enum pl_color_primaries { PL_COLOR_PRIM_UNKNOWN = 0, // Standard gamut: PL_COLOR_PRIM_BT_601_525, // ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C) PL_COLOR_PRIM_BT_601_625, // ITU-R Rec. BT.601 (625-line = PAL, SECAM) PL_COLOR_PRIM_BT_709, // ITU-R Rec. BT.709 (HD), also sRGB PL_COLOR_PRIM_BT_470M, // ITU-R Rec. BT.470 M PL_COLOR_PRIM_EBU_3213, // EBU Tech. 3213-E / JEDEC P22 phosphors // Wide gamut: PL_COLOR_PRIM_BT_2020, // ITU-R Rec. BT.2020 (UltraHD) PL_COLOR_PRIM_APPLE, // Apple RGB PL_COLOR_PRIM_ADOBE, // Adobe RGB (1998) PL_COLOR_PRIM_PRO_PHOTO, // ProPhoto RGB (ROMM) PL_COLOR_PRIM_CIE_1931, // CIE 1931 RGB primaries PL_COLOR_PRIM_DCI_P3, // DCI-P3 (Digital Cinema) PL_COLOR_PRIM_DISPLAY_P3, // DCI-P3 (Digital Cinema) with D65 white point PL_COLOR_PRIM_V_GAMUT, // Panasonic V-Gamut (VARICAM) PL_COLOR_PRIM_S_GAMUT, // Sony S-Gamut PL_COLOR_PRIM_FILM_C, // Traditional film primaries with Illuminant C PL_COLOR_PRIM_COUNT }; bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim); // Guesses the best primaries based on a resolution. This always guesses // conservatively, i.e. it will never return a wide gamut color space even if // the resolution is 4K. enum pl_color_primaries pl_color_primaries_guess(int width, int height); // The colorspace's transfer function (gamma / EOTF) enum pl_color_transfer { PL_COLOR_TRC_UNKNOWN = 0, // Standard dynamic range: PL_COLOR_TRC_BT_1886, // ITU-R Rec. BT.1886 (CRT emulation + OOTF) PL_COLOR_TRC_SRGB, // IEC 61966-2-4 sRGB (CRT emulation) PL_COLOR_TRC_LINEAR, // Linear light content PL_COLOR_TRC_GAMMA18, // Pure power gamma 1.8 PL_COLOR_TRC_GAMMA20, // Pure power gamma 2.0 PL_COLOR_TRC_GAMMA22, // Pure power gamma 2.2 PL_COLOR_TRC_GAMMA24, // Pure power gamma 2.4 PL_COLOR_TRC_GAMMA26, // Pure power gamma 2.6 PL_COLOR_TRC_GAMMA28, // Pure power gamma 2.8 PL_COLOR_TRC_PRO_PHOTO, // ProPhoto RGB (ROMM) // High dynamic range: PL_COLOR_TRC_PQ, // ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048 PL_COLOR_TRC_HLG, // ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67 PL_COLOR_TRC_V_LOG, // Panasonic V-Log (VARICAM) PL_COLOR_TRC_S_LOG1, // Sony S-Log1 PL_COLOR_TRC_S_LOG2, // Sony S-Log2 PL_COLOR_TRC_COUNT }; // Returns the nominal peak of a given transfer function, relative to the // reference white. This refers to the highest encodable signal level. // Always equal to 1.0 for SDR curves. // // Note: This returns the highest encodable signal by definition of the EOTF, // regardless of the ultimate representation (e.g. scene or display referred). // For HLG in particular, this is always around 3.77 - which is potentially // different from the signal peak after applying the OOTF to go from scene // referred to display referred (resulting in a display-referred peak of around // 4.92 for a 1000 cd/m^2 HLG reference display). float pl_color_transfer_nominal_peak(enum pl_color_transfer trc); static inline bool pl_color_transfer_is_hdr(enum pl_color_transfer trc) { return pl_color_transfer_nominal_peak(trc) > 1.0; } // This defines the display-space standard reference white level (in cd/m^2) // that is assumed for SDR content, for use when mapping between HDR and SDR in // display space. See ITU-R Report BT.2408 for more information. #define PL_COLOR_SDR_WHITE 203.0f // Deprecated. For compatibility with older versions of libplacebo. #define PL_COLOR_REF_WHITE PL_COLOR_SDR_WHITE #define PL_COLOR_SDR_WHITE_HLG 3.17955 // Represents a single CIE xy coordinate (e.g. CIE Yxy with Y = 1.0) struct pl_cie_xy { float x, y; }; // Recovers (X / Y) from a CIE xy value. static inline float pl_cie_X(struct pl_cie_xy xy) { return xy.x / xy.y; } // Recovers (Z / Y) from a CIE xy value. static inline float pl_cie_Z(struct pl_cie_xy xy) { return (1 - xy.x - xy.y) / xy.y; } static inline float pl_cie_xy_equal(const struct pl_cie_xy *a, const struct pl_cie_xy *b) { return a->x == b->x && a->y == b->y; } // Computes the CIE xy chromaticity coordinates of a CIE D-series illuminant // with the given correlated color temperature. // // `temperature` must be between 2500 K and 25000 K, inclusive. struct pl_cie_xy pl_white_from_temp(float temperature); // Represents the raw physical primaries corresponding to a color space. struct pl_raw_primaries { struct pl_cie_xy red, green, blue, white; }; // Returns whether two raw primaries are exactly identical. bool pl_raw_primaries_equal(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Replaces unknown values in the first struct by those of the second struct. void pl_raw_primaries_merge(struct pl_raw_primaries *orig, const struct pl_raw_primaries *update); // Returns the raw primaries for a given color space. const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim); // Represents raw HDR metadata as defined by SMPTE 2086 / CTA 861.3, which is // often attached to HDR sources and can be forwarded to HDR-capable displays, // or used to guide the libplacebo built-in tone mapping. struct pl_hdr_metadata { // Mastering display metadata. This is used for tone-mapping. struct pl_raw_primaries prim; // mastering display primaries float min_luma, max_luma; // min/max luminance (in cd/m²) // Content light level. This is ignored by libplacebo itself. float max_cll; // max content light level (in cd/m²) float max_fall; // max frame average light level (in cd/m²) }; extern const struct pl_hdr_metadata pl_hdr_metadata_empty; // equal to {0} extern const struct pl_hdr_metadata pl_hdr_metadata_hdr10; // generic HDR10 display // Returns whether two sets of HDR metadata are exactly identical. bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a, const struct pl_hdr_metadata *b); // Replaces unknown values in the first struct by those of the second struct. void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig, const struct pl_hdr_metadata *update); // Deprecated. No longer used by libplacebo. enum pl_color_light { PL_COLOR_LIGHT_UNKNOWN = 0, PL_COLOR_LIGHT_DISPLAY, // Display-referred, output as-is PL_COLOR_LIGHT_SCENE_HLG, // Scene-referred, HLG OOTF PL_COLOR_LIGHT_SCENE_709_1886, // Scene-referred, OOTF = BT.709+1886 interaction PL_COLOR_LIGHT_SCENE_1_2, // Scene-referred, OOTF = gamma 1.2 PL_COLOR_LIGHT_COUNT }; PL_DEPRECATED bool pl_color_light_is_scene_referred(enum pl_color_light light); // Rendering intent for colorspace transformations. These constants match the // ICC specification (Table 23) enum pl_rendering_intent { PL_INTENT_PERCEPTUAL = 0, PL_INTENT_RELATIVE_COLORIMETRIC = 1, PL_INTENT_SATURATION = 2, PL_INTENT_ABSOLUTE_COLORIMETRIC = 3 }; // Struct describing a physical color space. This information is needed to // turn a normalized RGB triple into its physical meaning, as well as to convert // between color spaces. struct pl_color_space { enum pl_color_primaries primaries; enum pl_color_transfer transfer; // HDR metadata for this color space. Note that this can also be combined // with SDR color transfers, in which case it's assumed that the color // transfer in question is linearly "stretched" relative to these values. struct pl_hdr_metadata hdr; // Deprecated fields enum pl_color_light light PL_DEPRECATED; // ignored float sig_peak PL_DEPRECATED; // replaced by `hdr.max_luma` float sig_avg PL_DEPRECATED; // ignored float sig_floor PL_DEPRECATED; // replaced by `hdr.min_luma` float sig_scale PL_DEPRECATED; // merged into `hdr.max/min_luma` }; #define pl_color_space(...) (&(struct pl_color_space) { __VA_ARGS__ }) // Returns whether or not a color space is considered as effectively HDR. // This is true when the effective signal peak is greater than the SDR // reference white (1.0), taking into account `csp->hdr`. bool pl_color_space_is_hdr(const struct pl_color_space *csp); // Returns whether or not a color space is "black scaled", in which case 0.0 is // the true black point. This is true for SDR signals other than BT.1886, as // well as for HLG. bool pl_color_space_is_black_scaled(const struct pl_color_space *csp); // Replaces unknown values in the first struct by those of the second struct. void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *update); // Returns whether two colorspaces are exactly identical. bool pl_color_space_equal(const struct pl_color_space *c1, const struct pl_color_space *c2); // Go through a color-space and explicitly default all unknown fields to // reasonable values. After this function is called, none of the values will be // PL_COLOR_*_UNKNOWN or 0.0. void pl_color_space_infer(struct pl_color_space *space); // Like `pl_color_space_infer`, but takes default values from the reference // color space (excluding certain special cases like HDR or wide gamut). This // is basically the logic used by `pl_shader_color_map` to decide the output // color space in a conservative way. void pl_color_space_infer_ref(struct pl_color_space *space, const struct pl_color_space *ref); // Some common color spaces. Note: These don't necessarily have all fields // filled, in particular `hdr` is left unset. extern const struct pl_color_space pl_color_space_unknown; extern const struct pl_color_space pl_color_space_srgb; extern const struct pl_color_space pl_color_space_bt709; extern const struct pl_color_space pl_color_space_hdr10; extern const struct pl_color_space pl_color_space_bt2020_hlg; extern const struct pl_color_space pl_color_space_monitor; // typical display // This represents metadata about extra operations to perform during colorspace // conversion, which correspond to artistic adjustments of the color. struct pl_color_adjustment { // Brightness boost. 0.0 = neutral, 1.0 = solid white, -1.0 = solid black float brightness; // Contrast boost. 1.0 = neutral, 0.0 = solid black float contrast; // Saturation gain. 1.0 = neutral, 0.0 = grayscale float saturation; // Hue shift, corresponding to a rotation around the [U, V] subvector, in // radians. Only meaningful for YCbCr-like colorspaces. 0.0 = neutral float hue; // Gamma adjustment. 1.0 = neutral, 0.0 = solid black float gamma; // Color temperature shift. 0.0 = 6500 K, -1.0 = 3000 K, 1.0 = 10000 K float temperature; }; // A struct pre-filled with all-neutral values. extern const struct pl_color_adjustment pl_color_adjustment_neutral; // Represents the chroma placement with respect to the luma samples. This is // only relevant for YCbCr-like colorspaces with chroma subsampling. enum pl_chroma_location { PL_CHROMA_UNKNOWN = 0, PL_CHROMA_LEFT, // MPEG2/4, H.264 PL_CHROMA_CENTER, // MPEG1, JPEG PL_CHROMA_TOP_LEFT, PL_CHROMA_TOP_CENTER, PL_CHROMA_BOTTOM_LEFT, PL_CHROMA_BOTTOM_CENTER, PL_CHROMA_COUNT, }; // Fills *x and *y with the offset in luma pixels corresponding to a given // chroma location. // // Note: PL_CHROMA_UNKNOWN defaults to PL_CHROMA_LEFT void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y); // Returns an RGB->XYZ conversion matrix for a given set of primaries. // Multiplying this into the RGB color transforms it to CIE XYZ, centered // around the color space's white point. struct pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim); // Similar to pl_get_rgb2xyz_matrix, but gives the inverse transformation. struct pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim); // Returns a primary adaptation matrix, which converts from one set of // primaries to another. This is an RGB->RGB transformation. For rendering // intents other than PL_INTENT_ABSOLUTE_COLORIMETRIC, the white point is // adapted using the Bradford matrix. struct pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent); // Return a chromatic adaptation matrix, which converts from one white point to // another, using the Bradford matrix. This is an RGB->RGB transformation. struct pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst); // Returns true if 'b' is entirely contained in 'a'. Useful for figuring out if // colorimetric clipping will occur or not. bool pl_primaries_superset(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Cone types involved in human vision enum pl_cone { PL_CONE_L = 1 << 0, PL_CONE_M = 1 << 1, PL_CONE_S = 1 << 2, // Convenience aliases PL_CONE_NONE = 0, PL_CONE_LM = PL_CONE_L | PL_CONE_M, PL_CONE_MS = PL_CONE_M | PL_CONE_S, PL_CONE_LS = PL_CONE_L | PL_CONE_S, PL_CONE_LMS = PL_CONE_L | PL_CONE_M | PL_CONE_S, }; // Structure describing parameters for simulating color blindness struct pl_cone_params { enum pl_cone cones; // Which cones are *affected* by the vision model float strength; // Coefficient for how strong the defect is // (1.0 = Unaffected, 0.0 = Full blindness) }; #define pl_cone_params(...) (&(struct pl_cone_params) { __VA_ARGS__ }) // Built-in color blindness models extern const struct pl_cone_params pl_vision_normal; // No distortion (92%) extern const struct pl_cone_params pl_vision_protanomaly; // Red deficiency (0.66%) extern const struct pl_cone_params pl_vision_protanopia; // Red absence (0.59%) extern const struct pl_cone_params pl_vision_deuteranomaly; // Green deficiency (2.7%) extern const struct pl_cone_params pl_vision_deuteranopia; // Green absence (0.56%) extern const struct pl_cone_params pl_vision_tritanomaly; // Blue deficiency (0.01%) extern const struct pl_cone_params pl_vision_tritanopia; // Blue absence (0.016%) extern const struct pl_cone_params pl_vision_monochromacy; // Blue cones only (<0.001%) extern const struct pl_cone_params pl_vision_achromatopsia; // Rods only (<0.0001%) // Returns a cone adaptation matrix. Applying this to an RGB color in the given // color space will apply the given cone adaptation coefficients for simulating // a type of color blindness. // // For the color blindness models which don't entail complete loss of a cone, // you can partially counteract the effect by using a similar model with the // `strength` set to its inverse. For example, to partially counteract // deuteranomaly, you could generate a cone matrix for PL_CONE_M with the // strength 2.0 (or some other number above 1.0). struct pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params, const struct pl_raw_primaries *prim); // Returns a color decoding matrix for a given combination of source color // representation and adjustment parameters. This mutates `repr` to reflect the // change. If `params` is NULL, it defaults to &pl_color_adjustment_neutral. // // This function always performs a conversion to RGB. To convert to other // colorspaces (e.g. between YUV systems), obtain a second YUV->RGB matrix // and invert it using `pl_transform3x3_invert`. // // Note: For BT.2020 constant-luminance, this outputs chroma information in the // range [-0.5, 0.5]. Since the CL system conversion is non-linear, further // processing must be done by the caller. The channel order is CrYCb. // // Note: For BT.2100 ICtCp, this outputs in the color space L'M'S'. Further // non-linear processing must be done by the caller. // // Note: For XYZ system, the input/encoding gamma must be pre-applied by the // user, typically this has a value of 2.6. struct pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params); // Common struct to describe an ICC profile struct pl_icc_profile { // Points to the in-memory representation of the ICC profile. This is // allowed to be NULL, in which case the `pl_icc_profile` represents "no // profile”. const void *data; size_t len; // If a profile is set, this signature must uniquely identify it, ideally // using a checksum of the profile contents. The user is free to choose the // method of determining this signature, but note the existence of the // `pl_icc_profile_compute_signature` helper. uint64_t signature; }; // This doesn't do a comparison of the actual contents, only of the signature. bool pl_icc_profile_equal(const struct pl_icc_profile *p1, const struct pl_icc_profile *p2); // Sets `signature` to a hash of `profile->data`, if non-NULL. Provided as a // convenience function for the sake of users ingesting arbitrary ICC profiles // from sources where they can't reliably detect profile changes. // // Note: This is based on a very fast hash, and will compute a signature for // even large (10 MB) ICC profiles in, typically, a fraction of a millisecond. void pl_icc_profile_compute_signature(struct pl_icc_profile *profile); PL_API_END #endif // LIBPLACEBO_COLORSPACE_H_ libplacebo-v4.192.1/src/include/libplacebo/common.h000066400000000000000000000176761417677245700221660ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COMMON_H_ #define LIBPLACEBO_COMMON_H_ #include #include PL_API_BEGIN // Some common utility types. These are overloaded to support 2D, 3D and // integer/float variants. struct pl_rect2d { int x0, y0; int x1, y1; }; struct pl_rect3d { int x0, y0, z0; int x1, y1, z1; }; struct pl_rect2df { float x0, y0; float x1, y1; }; struct pl_rect3df { float x0, y0, z0; float x1, y1, z1; }; // These macros will work for any of the above pl_rect variants (with enough // dimensions). Careful: double-evaluation hazard #define pl_rect_w(r) ((r).x1 - (r).x0) #define pl_rect_h(r) ((r).y1 - (r).y0) #define pl_rect_d(r) ((r).z1 - (r).z0) #define pl_rect2d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1) #define pl_rect3d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1 && \ (a).z0 == (b).z0 && (a).z1 == (b).z1) // "Normalize" a rectangle: This ensures d1 >= d0 for all dimensions. void pl_rect2d_normalize(struct pl_rect2d *rc); void pl_rect3d_normalize(struct pl_rect3d *rc); void pl_rect2df_normalize(struct pl_rect2df *rc); void pl_rect3df_normalize(struct pl_rect3df *rc); // Return the rounded form of a rect. struct pl_rect2d pl_rect2df_round(const struct pl_rect2df *rc); struct pl_rect3d pl_rect3df_round(const struct pl_rect3df *rc); // Represents a row-major matrix, i.e. the following matrix // [ a11 a12 a13 ] // [ a21 a22 a23 ] // [ a31 a32 a33 ] // is represented in C like this: // { { a11, a12, a13 }, // { a21, a22, a23 }, // { a31, a32, a33 } }; struct pl_matrix3x3 { float m[3][3]; }; extern const struct pl_matrix3x3 pl_matrix3x3_identity; // Applies a matrix to a float vector in-place. void pl_matrix3x3_apply(const struct pl_matrix3x3 *mat, float vec[3]); // Applies a matrix to a pl_rect3df void pl_matrix3x3_apply_rc(const struct pl_matrix3x3 *mat, struct pl_rect3df *rc); // Scales a color matrix by a linear factor. void pl_matrix3x3_scale(struct pl_matrix3x3 *mat, float scale); // Inverts a matrix. Only use where precision is not that important. void pl_matrix3x3_invert(struct pl_matrix3x3 *mat); // Composes/multiplies two matrices. Multiples B into A, i.e. // A := A * B void pl_matrix3x3_mul(struct pl_matrix3x3 *a, const struct pl_matrix3x3 *b); // Flipped version of `pl_matrix3x3_mul`. // B := A * B void pl_matrix3x3_rmul(const struct pl_matrix3x3 *a, struct pl_matrix3x3 *b); // Represents an affine transformation, which is basically a 3x3 matrix // together with a column vector to add onto the output. struct pl_transform3x3 { struct pl_matrix3x3 mat; float c[3]; }; extern const struct pl_transform3x3 pl_transform3x3_identity; // Applies a transform to a float vector in-place. void pl_transform3x3_apply(const struct pl_transform3x3 *t, float vec[3]); // Applies a transform to a pl_rect3df void pl_transform3x3_apply_rc(const struct pl_transform3x3 *t, struct pl_rect3df *rc); // Scales the output of a transform by a linear factor. Since an affine // transformation is non-linear, this does not commute. If you want to scale // the *input* of a transform, use pl_matrix3x3_scale on `t.mat`. void pl_transform3x3_scale(struct pl_transform3x3 *t, float scale); // Inverts a transform. Only use where precision is not that important. void pl_transform3x3_invert(struct pl_transform3x3 *t); // 2D analog of the above structs. Since these are featured less prominently, // we omit some of the other helper functions. struct pl_matrix2x2 { float m[2][2]; }; extern const struct pl_matrix2x2 pl_matrix2x2_identity; void pl_matrix2x2_apply(const struct pl_matrix2x2 *mat, float vec[2]); void pl_matrix2x2_apply_rc(const struct pl_matrix2x2 *mat, struct pl_rect2df *rc); struct pl_transform2x2 { struct pl_matrix2x2 mat; float c[2]; }; extern const struct pl_transform2x2 pl_transform2x2_identity; void pl_transform2x2_apply(const struct pl_transform2x2 *t, float vec[2]); void pl_transform2x2_apply_rc(const struct pl_transform2x2 *t, struct pl_rect2df *rc); // Helper functions for dealing with aspect ratios and stretched/scaled rects. // Return the (absolute) aspect ratio (width/height) of a given pl_rect2df. // This will always be a positive number, even if `rc` is flipped. float pl_rect2df_aspect(const struct pl_rect2df *rc); // Set the aspect of a `rc` to a given aspect ratio with an extra 'panscan' // factor choosing the balance between shrinking and growing the `rc` to meet // this aspect ratio. // // Notes: // - If `panscan` is 0.0, this function will only ever shrink the `rc`. // - If `panscan` is 1.0, this function will only ever grow the `rc`. // - If `panscan` is 0.5, this function is area-preserving. void pl_rect2df_aspect_set(struct pl_rect2df *rc, float aspect, float panscan); // Set one rect's aspect to that of another #define pl_rect2df_aspect_copy(rc, src, panscan) \ pl_rect2df_aspect_set((rc), pl_rect2df_aspect(src), (panscan)) // 'Fit' one rect inside another. `rc` will be set to the same size and aspect // ratio as `src`, but with the size limited to fit inside the original `rc`. // Like `pl_rect2df_aspect_set`, `panscan` controls the pan&scan factor. void pl_rect2df_aspect_fit(struct pl_rect2df *rc, const struct pl_rect2df *src, float panscan); // Scale rect in each direction while keeping it centered. void pl_rect2df_stretch(struct pl_rect2df *rc, float stretch_x, float stretch_y); // Offset rect by an arbitrary offset factor. If the corresponding dimension // of a rect is flipped, so too is the applied offset. void pl_rect2df_offset(struct pl_rect2df *rc, float offset_x, float offset_y); // Scale a rect uniformly in both dimensions. #define pl_rect2df_zoom(rc, zoom) pl_rect2df_stretch((rc), (zoom), (zoom)) // Rotation in degrees clockwise typedef int pl_rotation; enum { PL_ROTATION_0 = 0, PL_ROTATION_90 = 1, PL_ROTATION_180 = 2, PL_ROTATION_270 = 3, PL_ROTATION_360 = 4, // equivalent to PL_ROTATION_0 // Note: Values outside the range [0,4) are legal, including negatives. }; // Constrains to the interval [PL_ROTATION_0, PL_ROTATION_360). static inline pl_rotation pl_rotation_normalize(pl_rotation rot) { return (rot % PL_ROTATION_360 + PL_ROTATION_360) % PL_ROTATION_360; } // Rotates the coordinate system of a `pl_rect2d(f)` in a certain direction. // For example, calling this with PL_ROTATION_90 will correspond to rotating // the coordinate system 90° to the right (so the x axis becomes the y axis). // // The resulting rect is re-normalized in the same coordinate system, so this // is not the same as simply applying the rotation matrix generated by // `pl_rotation_matrix` to the rect. void pl_rect2df_rotate(struct pl_rect2df *rc, pl_rotation rot); // Returns the aspect ratio in a rotated frame of reference. static inline float pl_aspect_rotate(float aspect, pl_rotation rot) { return (rot % PL_ROTATION_180) ? 1.0 / aspect : aspect; } #define pl_rect2df_aspect_set_rot(rc, aspect, rot, panscan) \ pl_rect2df_aspect_set((rc), pl_aspect_rotate((aspect), (rot)), (panscan)) #define pl_rect2df_aspect_copy_rot(rc, src, panscan, rot) \ pl_rect2df_aspect_set_rot((rc), pl_rect2df_aspect(src), (rot), (panscan)) PL_API_END #endif // LIBPLACEBO_COMMON_H_ libplacebo-v4.192.1/src/include/libplacebo/config.h.in000066400000000000000000000040511417677245700225270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_CONFIG_H_ #define LIBPLACEBO_CONFIG_H_ // Increased any time the library changes in a fundamental/major way. #define PL_MAJOR_VER @majorver@ // Increased any time the API changes. (Note: Does not reset when PL_MAJOR_VER // is increased) #define PL_API_VER @apiver@ // Increased any time a fix is made to a given API version. #define PL_FIX_VER (pl_fix_ver()) // Friendly name (`git describe`) for the overall version of the library #define PL_VERSION (pl_version()) int pl_fix_ver(void); const char *pl_version(void); // Feature tests. These aren't described in further detail, but may be useful // for programmers wanting to programmatically check for feature support // in their compiled libplacebo versions. @extra_defs@ // Extra compiler-specific stuff #if defined(_MSC_VER) #define PL_DEPRECATED #else #define PL_DEPRECATED __attribute__((deprecated)) #endif // C++ compatibility #ifdef __cplusplus # define PL_STRUCT(name) struct name##_t # define PL_API_BEGIN extern "C" { # define PL_API_END } #else # define PL_STRUCT(name) struct name # define PL_API_BEGIN # define PL_API_END // Disable this warning because libplacebo's params macros override fields # pragma GCC diagnostic ignored "-Woverride-init" #endif // Extra helper macros #define PL_TOSTRING_INNER(x) #x #define PL_TOSTRING(x) PL_TOSTRING_INNER(x) #endif // LIBPLACEBO_CONTEXT_H_ libplacebo-v4.192.1/src/include/libplacebo/context.h000066400000000000000000000001031417677245700223330ustar00rootroot00000000000000// Note: This was renamed to `pl_log`. #include libplacebo-v4.192.1/src/include/libplacebo/d3d11.h000066400000000000000000000250001417677245700214660ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_D3D11_H_ #define LIBPLACEBO_D3D11_H_ #include #include #include #include #include // Structure representing the actual D3D11 device and associated GPU instance typedef const PL_STRUCT(pl_d3d11) { pl_gpu gpu; // The D3D11 device in use. The user is free to use this for their own // purposes, including taking a reference to the device (with AddRef) and // using it beyond the lifetime of the pl_d3d11 that created it (though if // this is done with debug enabled, it will confuse the leak checker.) ID3D11Device *device; // True if the device is using a software (WARP) adapter bool software; } *pl_d3d11; struct pl_d3d11_params { // The Direct3D 11 device to use. Optional, if NULL then libplacebo will // create its own ID3D11Device using the options below. If set, all the // options below will be ignored. ID3D11Device *device; // --- Adapter selection options // The adapter to use. This overrides adapter_luid. IDXGIAdapter *adapter; // The LUID of the adapter to use. If adapter and adapter_luid are unset, // the default adapter will be used instead. LUID adapter_luid; // Allow a software (WARP) adapter when selecting the adapter automatically. // Note that sometimes the default adapter will be a software adapter. This // is because, on Windows 8 and up, if there are no hardware adapters, // Windows will pretend the WARP adapter is the default hardware adapter. bool allow_software; // Always use a software adapter. This is mainly for testing purposes. bool force_software; // --- Device creation options // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) bool debug; // Extra flags to pass to D3D11CreateDevice (D3D11_CREATE_DEVICE_FLAG). // libplacebo should be compatible with any flags passed here. UINT flags; // The minimum and maximum allowable feature levels for the created device. // libplacebo will attempt to create a device with the highest feature level // between min_feature_level and max_feature_level (inclusive.) If there are // no supported feature levels in this range, `pl_d3d11_create` will either // return NULL or fall back to the software adapter, depending on whether // `allow_software` is set. // // Normally there is no reason to set `max_feature_level` other than to test // if a program works at lower feature levels. // // Note that D3D_FEATURE_LEVEL_9_3 and below (known as 10level9) are highly // restrictive. These feature levels are supported on a best-effort basis. // They represent very old DirectX 9 compatible PC and laptop hardware // (2001-2007, GeForce FX, 6, 7, ATI R300-R500, GMA 950-X3000) and some // less-old mobile devices (Surface RT, Surface 2.) Basic video rendering // should work, but the full pl_gpu API will not be available and advanced // shaders will probably fail. The hardware is probably too slow for these // anyway. // // Known restrictions of 10level9 devices include: // D3D_FEATURE_LEVEL_9_3 and below: // - `pl_pass_run_params->index_buf` will not work (but `index_data` will) // - Dimensions of 3D textures must be powers of two // - Shaders cannot use gl_FragCoord // - Shaders cannot use texelFetch // D3D_FEATURE_LEVEL_9_2 and below: // - Fragment shaders have no dynamic flow control and very strict limits // on the number of constants, temporary registers and instructions. // Whether a shader meets the requirements will depend on how it's // compiled and optimized, but it's likely that only simple shaders will // work. // D3D_FEATURE_LEVEL_9_1: // - No high-bit-depth formats with PL_FMT_CAP_RENDERABLE or // PL_FMT_CAP_LINEAR // // If these restrictions are undesirable and you don't need to support // ancient hardware, set `min_feature_level` to D3D_FEATURE_LEVEL_10_0. int min_feature_level; // Defaults to D3D_FEATURE_LEVEL_12_1 if unset int max_feature_level; // Defaults to D3D_FEATURE_LEVEL_9_1 if unset // Allow up to N in-flight frames. Similar to swapchain_depth for Vulkan and // OpenGL, though with DXGI this is a device-wide setting that affects all // swapchains (except for waitable swapchains.) See the documentation for // `pl_swapchain_latency` for more information. int max_frame_latency; }; // Default/recommended parameters. Should generally be safe and efficient. #define PL_D3D11_DEFAULTS \ .allow_software = true, #define pl_d3d11_params(...) (&(struct pl_d3d11_params) { PL_D3D11_DEFAULTS __VA_ARGS__ }) extern const struct pl_d3d11_params pl_d3d11_default_params; // Creates a new Direct3D 11 device based on the given parameters, or wraps an // existing device, and initializes a new GPU instance. If params is left as // NULL, it defaults to &pl_d3d11_default_params. If an existing device is // provided in params->device, `pl_d3d11_create` will take a reference to it // that will be released in `pl_d3d11_destroy`. pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params); // Release the D3D11 device. // // Note that all libplacebo objects allocated from this pl_d3d11 object (e.g. // via `d3d11->gpu` or using `pl_d3d11_create_swapchain`) *must* be explicitly // destroyed by the user before calling this. void pl_d3d11_destroy(pl_d3d11 *d3d11); // For a `pl_gpu` backed by `pl_d3d11`, this function can be used to retrieve // the underlying `pl_d3d11`. Returns NULL for any other type of `gpu`. pl_d3d11 pl_d3d11_get(pl_gpu gpu); struct pl_d3d11_swapchain_params { // The Direct3D 11 swapchain to wrap. Optional. If NULL, libplacebo will // create its own swapchain using the options below. If set, all the options // below will be ignored. The provided swapchain must have been created by // the same device used by `gpu` and must not have multisampled backbuffers. IDXGISwapChain *swapchain; // --- Swapchain creation options // Initial framebuffer width and height. If both width and height are set to // 0 and window is non-NULL, the client area of the window is used instead. // For convenience, if either component would be 0, it is set to 1 instead. // This is because Windows can have 0-sized windows, but not 0-sized // swapchains. int width; int height; // The handle of the output window. In Windows 8 and up this is optional // because you can output to a CoreWindow or create a composition swapchain // instead. HWND window; // A pointer to the CoreWindow to output to. If both this and `window` are // NULL, CreateSwapChainForComposition will be used to create the swapchain. IUnknown *core_window; // If set, libplacebo will create a swapchain that uses the legacy bitblt // presentation model (with the DXGI_SWAP_EFFECT_DISCARD swap effect.) This // tends to give worse performance and frame pacing in windowed mode and it // prevents borderless fullscreen optimizations, but it might be necessary // to work around buggy drivers, especially with DXGI 1.2 in the Platform // Update for Windows 7. When unset, libplacebo will try to use the flip // presentation model and only fall back to bitblt if flip is unavailable. bool blit; // additional swapchain flags // No validation on these flags is being performed, and swapchain creation // may fail if an unsupported combination is requested. UINT flags; }; #define pl_d3d11_swapchain_params(...) (&(struct pl_d3d11_swapchain_params) { __VA_ARGS__ }) // Creates a new Direct3D 11 swapchain, or wraps an existing one. If an existing // swapchain is provided in params->swapchain, `pl_d3d11_create_swapchain` will // take a reference to it that will be released in `pl_swapchain_destroy`. pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, const struct pl_d3d11_swapchain_params *params); // Takes a `pl_swapchain` created by pl_d3d11_create_swapchain and returns a // reference to the underlying IDXGISwapChain. This increments the refcount, so // call IDXGISwapChain::Release when finished with it. IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw); struct pl_d3d11_wrap_params { // The D3D11 texture to wrap, or a texture array containing the texture to // wrap. Must be a ID3D11Texture1D, ID3D11Texture2D or ID3D11Texture3D // created by the same device used by `gpu`, must have D3D11_USAGE_DEFAULT, // and must not be mipmapped or multisampled. ID3D11Resource *tex; // If tex is a texture array, this is the array member to use as the pl_tex. int array_slice; // If tex is a video resource (eg. DXGI_FORMAT_AYUV, DXGI_FORMAT_NV12, // DXGI_FORMAT_P010, etc.,) it can be wrapped as a pl_tex by specifying the // type and size of the shader view. For planar video formats, the plane // that is wrapped depends on the chosen format. // // If tex is not a video resource, these fields are unnecessary. The correct // format will be determined automatically. If tex is not 2D, these fields // are ignored. // // For a list of supported video formats and their corresponding view // formats and sizes, see: // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#VideoViews DXGI_FORMAT fmt; int w; int h; }; #define pl_d3d11_wrap_params(...) (&(struct pl_d3d11_wrap_params) { __VA_ARGS__ }) // Wraps an external texture into a pl_tex abstraction. `pl_d3d11_wrap` takes a // reference to the texture, which is released when `pl_tex_destroy` is called. // // This function may fail due to incompatible formats, incompatible flags or // other reasons, in which case it will return NULL. pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params); #endif // LIBPLACEBO_D3D11_H_ libplacebo-v4.192.1/src/include/libplacebo/dispatch.h000066400000000000000000000233431417677245700224610ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DISPATCH_H_ #define LIBPLACEBO_DISPATCH_H_ #include #include PL_API_BEGIN // Thread-safety: Safe typedef PL_STRUCT(pl_dispatch) *pl_dispatch; // Creates a new shader dispatch object. This object provides a translation // layer between generated shaders (pl_shader) and the ra context such that it // can be used to execute shaders. This dispatch object will also provide // shader caching (for efficient re-use). pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu); void pl_dispatch_destroy(pl_dispatch *dp); // Reset/increments the internal counters of the pl_dispatch. This must be // called whenever the user is going to begin with a new frame, in order to // perform garbage collection and advance the state of the internal PRNG. // // Note that shaders generated by `pl_dispatch` are therefore entirely // deterministic, as long as the sequence of calls (and inputs to the shader) // are the same. void pl_dispatch_reset_frame(pl_dispatch dp); // Returns a blank pl_shader object, suitable for recording rendering commands. // For more information, see the header documentation in `shaders/*.h`. pl_shader pl_dispatch_begin(pl_dispatch dp); // Struct passed to `info_callback`. Only valid until that function returns. struct pl_dispatch_info { // The finalized/generated shader for this shader execution, as well // as a signature uniquely identifying it. const struct pl_shader_res *shader; uint64_t signature; // A list of execution times for this pass, in nanoseconds. May be empty. uint64_t samples[256]; int num_samples; // As a convenience, this contains the last, average and peak of the above // list of samples. If `num_samples` is 0, these values are also 0. uint64_t last; uint64_t peak; uint64_t average; }; // Set up a dispatch callback for this `pl_dispatch` object. The given callback // will be run for every successfully dispatched shader. Call this again with // `cb == NULL` to disable. void pl_dispatch_callback(pl_dispatch dp, void *priv, void (*cb)(void *priv, const struct pl_dispatch_info *)); struct pl_dispatch_params { // The shader to execute. The pl_dispatch will take over ownership // of this shader, and return it back to the internal pool. // // This shader must have a compatible signature, i.e. inputs // `PL_SHADER_SIG_NONE` and outputs `PL_SHADER_SIG_COLOR`. pl_shader *shader; // The texture to render to. This must have params compatible with the // shader, i.e. `target->params.renderable` for fragment shaders and // `target->params.storable` for compute shaders. // // Note: Even when not using compute shaders, users are advised to always // set `target->params.storable` if permitted by the `pl_fmt`, since this // allows the use of compute shaders instead of full-screen quads, which is // faster on some platforms. pl_tex target; // The target rect to render to. Optional, if left as {0}, then the // entire texture will be rendered to. struct pl_rect2d rect; // If set, enables and controls the blending for this pass. Optional. When // using this with fragment shaders, `target->params.fmt->caps` must // include `PL_FMT_CAP_BLENDABLE`. const struct pl_blend_params *blend_params; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_params(...) (&(struct pl_dispatch_params) { __VA_ARGS__ }) // Dispatch a generated shader (via the pl_shader mechanism). Returns whether // or not the dispatch was successful. bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params); struct pl_dispatch_compute_params { // The shader to execute. This must be a compute shader with the input // set to PL_SHADER_SIG_NONE. The output, if it has any, is ignored. pl_shader *shader; // The number of work groups to dispatch in each dimension. If this is left // as [0} and `width/height` are both set, the number of work groups will // be inferred from the shader's `compute_group_sizes`. int dispatch_size[3]; // If set, simulate vertex attributes (similar to `pl_dispatch_finish`) // according to the given dimensions. The first two components of the // thread's global ID will be interpreted as the X and Y locations. // // Optional, ignored if either component is left as 0. int width, height; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_compute_params(...) (&(struct pl_dispatch_compute_params) { __VA_ARGS__ }) // A variant of `pl_dispatch_finish`, this one only dispatches a compute shader // while ignoring its output (if it has one). It's only useful for shaders // which have otherwise observable side effects (such as updating state // objects). bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params); enum pl_vertex_coords { PL_COORDS_ABSOLUTE, // Absolute/integer `target` coordinates PL_COORDS_RELATIVE, // Relative `target` coordinates in range [0, 1] PL_COORDS_NORMALIZED, // GL-normalized coordinates in range [-1, 1] }; struct pl_dispatch_vertex_params { // The shader to execute. This must be a raster shader with the input set // to `PL_SHADER_SIG_NONE` and the output set to `PL_SHADER_SIG_COLOR`. // // Additionally, the shader must not have any attached vertex attributes. pl_shader *shader; // The texture to render to. Requires `target->params.renderable`. pl_tex target; // The target rect to clip the rendering to. (Optional) struct pl_rect2d scissors; // If set, enables and controls the blending for this pass. Optional. When // enabled, `target->params.fmt->caps` must include `PL_FMT_CAP_BLENDABLE`. const struct pl_blend_params *blend_params; // The description of the vertex format, including offsets. // // Note: `location` is ignored and can safely be left unset. const struct pl_vertex_attrib *vertex_attribs; int num_vertex_attribs; size_t vertex_stride; // The index of the vertex position in `vertex_attribs`, as well as the // interpretation of its contents. int vertex_position_idx; enum pl_vertex_coords vertex_coords; bool vertex_flipped; // flip all vertex y coordinates // Type and number of vertices to render. enum pl_prim_type vertex_type; int vertex_count; // Vertex data. See `pl_pass_run_params.vertex_data`. const void *vertex_data; pl_buf vertex_buf; size_t buf_offset; // Index data. See `pl_pass_run_params.index_data`. Optional. const void *index_data; enum pl_index_format index_fmt; pl_buf index_buf; size_t index_offset; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_vertex_params(...) (&(struct pl_dispatch_vertex_params) { __VA_ARGS__ }) // Dispatch a generated shader using custom vertices, rather than using a quad // generated by the dispatch. This allows the use of e.g. custom fragment // shaders for things like rendering custom UI elements, or possibly doing // advanced things like sampling from a cube map or spherical video. bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params); // Cancel an active shader without submitting anything. Useful, for example, // if the shader was instead merged into a different shader. void pl_dispatch_abort(pl_dispatch dp, pl_shader *sh); // Serialize the internal state of a `pl_dispatch` into an abstract cache // object that can be e.g. saved to disk and loaded again later. This function // will not truncate, so the buffer provided by the user must be large enough // to contain the entire output. Returns the number of bytes written to // `out_cache`, or the number of bytes that *would* have been written to // `out_cache` if it's NULL. size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out_cache); // Load the result of a previous `pl_dispatch_save` call. This function will // never fail. It doesn't forget about any existing shaders, but merely // initializes an internal state cache needed to more efficiently compile // shaders that are not yet present in the `pl_dispatch`. // // Note: See the security warnings on `pl_pass_params.cached_program`. void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache); PL_API_END #endif // LIBPLACEBO_DISPATCH_H libplacebo-v4.192.1/src/include/libplacebo/dither.h000066400000000000000000000030311417677245700221310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DITHER_H_ #define LIBPLACEBO_DITHER_H_ #include PL_API_BEGIN // Generates a deterministic NxN bayer (ordered) dither matrix, storing the // result in `data`. `size` must be a power of two. The resulting matrix will // be roughly uniformly distributed within the range [0,1). void pl_generate_bayer_matrix(float *data, int size); // Generates a random NxN blue noise texture. storing the result in `data`. // `size` must be a positive power of two no larger than 256. The resulting // texture will be roughly uniformly distributed within the range [0,1). // // Note: This function is very, *very* slow for large sizes. Generating a // dither matrix with size 256 can take several seconds on a modern processor. void pl_generate_blue_noise(float *data, int size); PL_API_END #endif // LIBPLACEBO_DITHER_H_ libplacebo-v4.192.1/src/include/libplacebo/dummy.h000066400000000000000000000151111417677245700220070ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DUMMY_H_ #define LIBPLACEBO_DUMMY_H_ #include PL_API_BEGIN // The functions in this file allow creating and manipulating "dummy" contexts. // A dummy context isn't actually mapped by the GPU, all data exists purely on // the CPU. It also isn't capable of compiling or executing any shaders, any // attempts to do so will simply fail. // // The main use case for this dummy context is for users who want to generate // advanced shaders that depend on specific GLSL features or support for // certain types of GPU resources (e.g. LUTs). This dummy context allows such // shaders to be generated, with all of the referenced shader objects and // textures simply containing their data in a host-accessible way. struct pl_gpu_dummy_params { // These GPU parameters correspond to their equivalents in `pl_gpu`, and // must obey the same rules as documented there. The values from // `pl_gpu_dummy_default_params` are set to support pretty much everything // and are set for GLSL version 450. // // Individual fields such as `glsl.compute` or `glsl.version` description // can and should be overridden by the user based on their requirements. // Individual limits should ideally be set based on the corresponding // `glGet` queries etc. struct pl_glsl_version glsl; struct pl_gpu_limits limits; }; #define PL_GPU_DUMMY_DEFAULTS \ .glsl = { \ .version = 450, \ .gles = false, \ .vulkan = false, \ .compute = true, \ .max_shmem_size = SIZE_MAX, \ .max_group_threads = 1024, \ .max_group_size = { 1024, 1024, 1024 }, \ .subgroup_size = 32, \ .min_gather_offset = INT16_MIN, \ .max_gather_offset = INT16_MAX, \ }, \ .limits = { \ /* pl_gpu */ \ .callbacks = false, \ .thread_safe = true, \ /* pl_buf */ \ .max_buf_size = SIZE_MAX, \ .max_ubo_size = SIZE_MAX, \ .max_ssbo_size = SIZE_MAX, \ .max_vbo_size = SIZE_MAX, \ .max_mapped_size = SIZE_MAX, \ .max_buffer_texels = UINT64_MAX, \ /* pl_tex */ \ .max_tex_1d_dim = UINT32_MAX, \ .max_tex_2d_dim = UINT32_MAX, \ .max_tex_3d_dim = UINT32_MAX, \ .buf_transfer = true, \ .align_tex_xfer_pitch = 1, \ .align_tex_xfer_offset = 1, \ /* pl_pass */ \ .max_variable_comps = SIZE_MAX, \ .max_constants = SIZE_MAX, \ .max_pushc_size = SIZE_MAX, \ .max_dispatch = { UINT32_MAX, UINT32_MAX, UINT32_MAX }, \ .fragment_queues = 0, \ .compute_queues = 0, \ }, #define pl_gpu_dummy_params(...) (&(struct pl_gpu_dummy_params) { PL_GPU_DUMMY_DEFAULTS __VA_ARGS__ }) extern const struct pl_gpu_dummy_params pl_gpu_dummy_default_params; // Create a dummy GPU context based on the given parameters. This GPU will have // a format for each host-representable type (i.e. intN_t, floats and doubles), // in the canonical channel order RGBA. These formats will have every possible // capability activated, respectively. // // If `params` is left as NULL, it defaults to `&pl_gpu_dummy_params`. pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params); void pl_gpu_dummy_destroy(pl_gpu *gpu); // Back-doors into the `pl_tex` and `pl_buf` representations. These allow you // to access the raw data backing this object. Textures are always laid out in // a tightly packed manner. // // For "placeholder" dummy textures, this always returns NULL. uint8_t *pl_buf_dummy_data(pl_buf buf); uint8_t *pl_tex_dummy_data(pl_tex tex); // Skeleton of `pl_tex_params` containing only the fields relevant to // `pl_tex_dummy_create`, plus the extra `sampler_type` field. struct pl_tex_dummy_params { int w, h, d; pl_fmt format; enum pl_sampler_type sampler_type; void *user_data; }; #define pl_tex_dummy_params(...) (&(struct pl_tex_dummy_params) { __VA_ARGS__ }) // Allows creating a "placeholder" dummy texture. This is basically a texture // that isn't even backed by anything. All `pl_tex_*` operations (other than // `pl_tex_destroy`) performed on it will simply fail. // // All of the permissions will be set to `false`, except `sampleable`, which is // set to `true`. (So you can use it as an input to shader sampling functions) pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params); PL_API_END #endif // LIBPLACEBO_DUMMY_H_ libplacebo-v4.192.1/src/include/libplacebo/filters.h000066400000000000000000000373151417677245700223360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_FILTER_KERNELS_H_ #define LIBPLACEBO_FILTER_KERNELS_H_ #include #include PL_API_BEGIN #define PL_FILTER_MAX_PARAMS 2 // Represents a single filter function, i.e. kernel or windowing function. // To invoke a filter with a different configuration than the default, you can // make a copy of this struct and modify the non-const fields before passing it // to pl_filter_initialize. struct pl_filter_function { // These bools indicate whether or not `radius` and `params` may be // modified by the user. bool resizable; bool tunable[PL_FILTER_MAX_PARAMS]; // The underlying filter function itself: Computes the weight as a function // of the offset. All filter functions must be normalized such that x=0 is // the center point, and in particular weight(0) = 1.0. The functions may // be undefined for values of x outside [0, radius]. double (*weight)(const struct pl_filter_function *k, double x); // This field may be used to adjust the function's radius. Defaults to the // the radius needed to represent a single filter lobe (tap). If the // function is not resizable, this field must not be modified - otherwise // the result of filter evaluation is undefined. float radius; // These fields may be used to adjust the function. Defaults to the // function's preferred defaults. if the relevant setting is not tunable, // they are ignored entirely. float params[PL_FILTER_MAX_PARAMS]; }; bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b); // Box filter: Entirely 1.0 within the radius, entirely 0.0 outside of it. // This is also sometimes called a Dirichlet window extern const struct pl_filter_function pl_filter_function_box; // Triangle filter: Linear transitions from 1.0 at x=0 to 0.0 at x=radius. // This is also sometimes called a Bartlett window. extern const struct pl_filter_function pl_filter_function_triangle; // Cosine filter: Ordinary cosine function, single lobe. extern const struct pl_filter_function pl_filter_function_cosine; // Hann function: Cosine filter named after Julius von Hann. Also commonly // mislabeled as a "Hanning" function, due to its similarly to the Hamming // function. extern const struct pl_filter_function pl_filter_function_hann; // Hamming function: Cosine filter named after Richard Hamming. extern const struct pl_filter_function pl_filter_function_hamming; // Welch filter: Polynomial function consisting of a single parabolic section. extern const struct pl_filter_function pl_filter_function_welch; // Kaiser filter: Approximation of the DPSS window using Bessel functions. // Also sometimes called a Kaiser-Bessel window. // Parameter [0]: Shape (alpha). Determines the trade-off between the main lobe // and the side lobes. extern const struct pl_filter_function pl_filter_function_kaiser; // Blackman filter: Cosine filter named after Ralph Beebe Blackman. // Parameter [0]: Scale (alpha). Influences the shape. The defaults result in // zeros at the third and fourth sidelobes. extern const struct pl_filter_function pl_filter_function_blackman; // Bohman filter: 2nd order Cosine filter. extern const struct pl_filter_function pl_filter_function_bohman; // Gaussian function: Similar to the Gaussian distribution, this defines a // bell curve function. // Parameter [0]: Scale (t), increasing makes the result blurrier. extern const struct pl_filter_function pl_filter_function_gaussian; // Quadratic function: 2nd order approximation of the gaussian function. Also // sometimes called a "quadric" window. extern const struct pl_filter_function pl_filter_function_quadratic; // Sinc function: Widely used for both kernels and windows, sinc(x) = sin(x)/x. extern const struct pl_filter_function pl_filter_function_sinc; // Jinc function: Similar to sinc, but extended to the 2D domain. Widely // used as the kernel of polar (EWA) filters. Also sometimes called a Sombrero // function. extern const struct pl_filter_function pl_filter_function_jinc; // Sphinx function: Similar to sinc and jinx, but extended to the 3D domain. // The name is derived from "spherical" sinc. Can be used to filter 3D signals // in theory. extern const struct pl_filter_function pl_filter_function_sphinx; // B/C-tunable Spline function: This is a family of commonly used spline // functions with two tunable parameters. Does not need to be windowed. // Parameter [0]: "B" // Parameter [1]: "C" // Due to its populariy, this function is available in several variants. // B = 0.0, C = 0.0: "base" bcspline, AKA Hermite spline (blocky) // B = 0.0, C = 0.5: Catmull-Rom filter (sharp) // B = 1/3, C = 1/3: Mitchell-Netravali filter (soft, doesn't ring) // B ≈ 0.37, C ≈ 0.31: Robidoux filter (used by ImageMagick) // B ≈ 0.26, C ≈ 0.37: RobidouxSharp filter. (sharper variant of Robidoux) extern const struct pl_filter_function pl_filter_function_bcspline; extern const struct pl_filter_function pl_filter_function_catmull_rom; extern const struct pl_filter_function pl_filter_function_mitchell; extern const struct pl_filter_function pl_filter_function_robidoux; extern const struct pl_filter_function pl_filter_function_robidouxsharp; // Bicubic function: Very smooth and free of ringing, but very blurry. Does not // need to be windowed. extern const struct pl_filter_function pl_filter_function_bicubic; // Piecewise approximations of the Lanczos filter function (sinc-windowed // sinc). Referred to as "spline16", "spline36" and "spline64" mainly for // historical reasons, based on their fixed radii of 2, 3 and 4 (respectively). // These do not need to be windowed. extern const struct pl_filter_function pl_filter_function_spline16; extern const struct pl_filter_function pl_filter_function_spline36; extern const struct pl_filter_function pl_filter_function_spline64; struct pl_filter_function_preset { const char *name; const struct pl_filter_function *function; }; // A list of built-in filter function presets, terminated by {0} extern const struct pl_filter_function_preset pl_filter_function_presets[]; extern const int pl_num_filter_function_presets; // excluding trailing {0} // Find the filter function preset with the given name, or NULL on failure. const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name); // Backwards compatibility #define pl_named_filter_function pl_filter_function_preset #define pl_named_filter_functions pl_filter_function_presets #define pl_find_named_filter_function pl_find_filter_function_preset // Represents a particular configuration/combination of filter functions to // form a filter. struct pl_filter_config { const struct pl_filter_function *kernel; // The kernel function const struct pl_filter_function *window; // The windowing function. Optional // Represents a clamping coefficient for negative weights. A value of 0.0 // (the default) represents no clamping. A value of 1.0 represents full // clamping, i.e. all negative weights will be clamped to 0. Values in // between will be linearly scaled. float clamp; // Additional blur coefficient. This effectively stretches the kernel, // without changing the effective radius of the filter radius. Setting this // to a value of 0.0 is equivalent to disabling it. Values significantly // below 1.0 may seriously degrade the visual output, and should be used // with care. float blur; // Additional taper coefficient. This essentially flattens the function's // center. The values within [-taper, taper] will return 1.0, with the // actual function being squished into the remainder of [taper, radius]. // Defaults to 0.0. float taper; // If true, this filter is intended to be used as a polar/2D filter (EWA) // instead of a separable/1D filter. Does not affect the actual sampling, // but provides information about how the results are to be interpreted. bool polar; }; bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b); // Samples a given filter configuration at a given x coordinate, while // respecting all parameters of the configuration. double pl_filter_sample(const struct pl_filter_config *c, double x); // A list of built-in filter configurations. Since they are just combinations // of the above filter functions, they are not described in much further // detail. extern const struct pl_filter_config pl_filter_spline16; // 2 taps extern const struct pl_filter_config pl_filter_spline36; // 3 taps extern const struct pl_filter_config pl_filter_spline64; // 4 taps extern const struct pl_filter_config pl_filter_nearest; // AKA box extern const struct pl_filter_config pl_filter_bilinear; // AKA triangle extern const struct pl_filter_config pl_filter_gaussian; // Sinc family (all configured to 3 taps): extern const struct pl_filter_config pl_filter_sinc; // unwindowed, extern const struct pl_filter_config pl_filter_lanczos; // sinc-sinc extern const struct pl_filter_config pl_filter_ginseng; // sinc-jinc extern const struct pl_filter_config pl_filter_ewa_jinc; // unwindowed extern const struct pl_filter_config pl_filter_ewa_lanczos; // jinc-jinc extern const struct pl_filter_config pl_filter_ewa_ginseng; // jinc-sinc extern const struct pl_filter_config pl_filter_ewa_hann; // jinc-hann extern const struct pl_filter_config pl_filter_haasnsoft PL_DEPRECATED; // Spline family extern const struct pl_filter_config pl_filter_bicubic; extern const struct pl_filter_config pl_filter_catmull_rom; extern const struct pl_filter_config pl_filter_mitchell; extern const struct pl_filter_config pl_filter_mitchell_clamp; // clamp = 1.0 extern const struct pl_filter_config pl_filter_robidoux; extern const struct pl_filter_config pl_filter_robidouxsharp; extern const struct pl_filter_config pl_filter_ewa_robidoux; extern const struct pl_filter_config pl_filter_ewa_robidouxsharp; // Backwards compatibility #define pl_filter_box pl_filter_nearest #define pl_filter_triangle pl_filter_bilinear struct pl_filter_preset { const char *name; const struct pl_filter_config *filter; // Longer / friendly name, or NULL for aliases const char *description; }; // A list of built-in filter presets, terminated by {0} extern const struct pl_filter_preset pl_filter_presets[]; extern const int pl_num_filter_presets; // excluding trailing {0} // Find the filter preset with the given name, or NULL on failure. const struct pl_filter_preset *pl_find_filter_preset(const char *name); // Backwards compatibility #define pl_named_filter_config pl_filter_preset #define pl_named_filters pl_filter_presets #define pl_find_named_filter pl_find_filter_preset // Parameters for filter generation. struct pl_filter_params { // The particular filter configuration to be sampled. config.kernel must // be set to a valid pl_filter_function. struct pl_filter_config config; // The precision of the resulting LUT. A value of 64 should be fine for // most practical purposes, but higher or lower values may be justified // depending on the use case. This value must be set to something > 0. int lut_entries; // When set to values above 1.0, the filter will be computed at a size // larger than the radius would otherwise require, in order to prevent // aliasing when downscaling. In practice, this should be set to the // inverse of the scaling ratio, i.e. src_size / dst_size. float filter_scale; // --- polar filers only (config.polar) // As a micro-optimization, all samples below this cutoff value will be // ignored when updating the cutoff radius. Setting it to a value of 0.0 // disables this optimization. float cutoff; // --- separable filters only (!config.polar) // Indicates the maximum row size that is supported by the calling code, or // 0 for no limit. int max_row_size; // Indicates the row stride alignment. For some use cases (e.g. uploading // the weights as a texture), there are certain alignment requirements for // each row. The chosen row_size will always be a multiple of this value. // Specifying 0 indicates no alignment requirements. int row_stride_align; }; #define pl_filter_params(...) (&(struct pl_filter_params) { __VA_ARGS__ }) // Represents an initialized instance of a particular filter, with a // precomputed LUT. The interpretation of the LUT depends on the type of the // filter (polar or separable). typedef const PL_STRUCT(pl_filter) { // Deep copy of the parameters, for convenience. struct pl_filter_params params; // Contains the true radius of the computed filter. This may be // larger than `config.kernel->radius` depending on the `scale` passed to // pl_filter_generate. This is only relevant for polar filters, where it // affects the value range of *weights. float radius; // The computed look-up table (LUT). For polar filters, this is interpreted // as a 1D array with dimensions [lut_entries] containing the raw filter // samples on the scale [0, radius]. For separable (non-polar) filters, // this is interpreted as a 2D array with dimensions // [lut_entries][row_stride]. The inner rows contain the `row_size` samples // to convolve with the corresponding input pixels. The outer coordinate is // used to very the fractional offset (phase). So for example, if the // sample position to reconstruct is directly aligned with the source // texels, you would use the values from weights[0]. If the sample position // to reconstruct is exactly half-way between two source texels (180° out // of phase), you would use the values from weights[lut_entries/2]. const float *weights; // --- polar filters only (params.config.polar) // Contains the effective cut-off radius for this filter. Samples outside // of this cutoff radius may be discarded. Computed based on the `cutoff` // value specified at filter generation. Only relevant for polar filters // since skipping samples outside of the radius can be a significant // performance gain for EWA sampling. float radius_cutoff; // --- separable filters only (!params.config.polar) // The number of source texels to convolve over for each row. This value // will never exceed the given `max_row_size`. If the filter ends up // cut off because of this, the bool `insufficient` will be set to true. int row_size; bool insufficient; // The separation (in *weights) between each row of the filter. Always // a multiple of params.row_stride_align. int row_stride; } *pl_filter; // Generate (compute) a filter instance based on a given filter configuration. // The resulting pl_filter must be freed with `pl_filter_free` when no longer // needed. Returns NULL if filter generation fails due to invalid parameters // (i.e. missing a required parameter). pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params); void pl_filter_free(pl_filter *filter); PL_API_END #endif // LIBPLACEBO_FILTER_KERNELS_H_ libplacebo-v4.192.1/src/include/libplacebo/gpu.h000066400000000000000000002001651417677245700214540ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_GPU_H_ #define LIBPLACEBO_GPU_H_ #include #include #include #include #include PL_API_BEGIN // These are not memory managed, and should represent compile-time constants typedef const char *pl_debug_tag; #define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__)) // Type of a shader input descriptor. enum pl_desc_type { PL_DESC_INVALID = 0, PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler // (`pl_tex->params.sampleable` must be set) PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image // (`pl_tex->params.storable` must be set) PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer // (`pl_buf->params.uniform` must be set) PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer // (`pl_buf->params.storable` must be set) PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer // (`pl_buf->params.uniform` and `format` must be set) PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer // (`pl_buf->params.uniform` and `format` must be set) PL_DESC_TYPE_COUNT }; // This file contains the definition of an API which is designed to abstract // away from platform-specific APIs like the various OpenGL variants, Direct3D // and Vulkan in a common way. It is a much more limited API than those APIs, // since it tries targeting a very small common subset of features that is // needed to implement libplacebo's rendering. // // NOTE: Most, but not all, parameter conditions (phrases such as "must" or // "valid usage" are explicitly tested and result in error messages followed by // graceful failure. Exceptions are noted where they exist. // Structure which wraps metadata describing GLSL capabilities. struct pl_glsl_version { int version; // GLSL version (e.g. 450), for #version bool gles; // GLSL ES semantics (ESSL) bool vulkan; // GL_KHR_vulkan_glsl semantics // Compute shader support and limits. If `compute` is false, then all // of the remaining fields in this section are {0}. bool compute; size_t max_shmem_size; // maximum compute shader shared memory size uint32_t max_group_threads; // maximum number of local threads per work group uint32_t max_group_size[3]; // maximum work group size per dimension // If nonzero, signals availability of shader subgroups. This guarantess // availability of all of the following extensions: // - GL_KHR_shader_subgroup_basic // - GL_KHR_shader_subgroup_vote // - GL_KHR_shader_subgroup_arithmetic // - GL_KHR_shader_subgroup_ballot // - GL_KHR_shader_subgroup_shuffle uint32_t subgroup_size; // Miscellaneous shader limits int16_t min_gather_offset; // minimum `textureGatherOffset` offset int16_t max_gather_offset; // maximum `textureGatherOffset` offset }; // Backwards compatibility alias #define pl_glsl_desc pl_glsl_version // Structure defining the physical limits and capabilities of this GPU // instance. If a limit is given as 0, that means that feature is unsupported. struct pl_gpu_limits { // --- pl_gpu bool thread_safe; // `pl_gpu` calls are thread-safe bool callbacks; // supports asynchronous GPU callbacks // --- pl_buf size_t max_buf_size; // maximum size of any buffer size_t max_ubo_size; // maximum size of a `uniform` buffer size_t max_ssbo_size; // maximum size of a `storable` buffer size_t max_vbo_size; // maximum size of a `drawable` buffer size_t max_mapped_size; // maximum size of a `host_mapped` buffer uint64_t max_buffer_texels; // maximum number of texels in a texel buffer // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided // merely as a hint to the user. If the host pointer being imported is // misaligned, libplacebo will internally round (over-map) the region. size_t align_host_ptr; // --- pl_tex uint32_t max_tex_1d_dim; // maximum width for a 1D texture uint32_t max_tex_2d_dim; // maximum width/height for a 2D texture (required) uint32_t max_tex_3d_dim; // maximum width/height/depth for a 3D texture bool blittable_1d_3d; // supports blittable 1D/3D textures bool buf_transfer; // supports `pl_tex_transfer_params.buf` // These don't represent hard limits but indicate performance hints for // optimal alignment. For best performance, the corresponding field // should be aligned to a multiple of these. They will always be a power // of two. size_t align_tex_xfer_pitch; // optimal `pl_tex_transfer_params.row_pitch` size_t align_tex_xfer_offset; // optimal `pl_tex_transfer_params.buf_offset` // --- pl_pass size_t max_variable_comps; // maximum components passed in variables size_t max_constants; // maximum `pl_pass_params.num_constants` size_t max_pushc_size; // maximum `push_constants_size` size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride` uint32_t max_dispatch[3]; // maximum dispatch size per dimension // Note: At least one of `max_variable_comps` or `max_ubo_size` is // guaranteed to be nonzero. // As a performance hint, the GPU may signal the number of command queues // it has for fragment and compute shaders, respectively. Users may use // this information to decide the appropriate type of shader to dispatch. uint32_t fragment_queues; uint32_t compute_queues; // --- Deprecated fields. Provided for backwards compatibility. See the // corresponding fields in `pl_glsl_version` for their replacements. size_t max_shmem_size PL_DEPRECATED; uint32_t max_group_threads PL_DEPRECATED; uint32_t max_group_size[3] PL_DEPRECATED; uint32_t subgroup_size PL_DEPRECATED; int16_t min_gather_offset PL_DEPRECATED; int16_t max_gather_offset PL_DEPRECATED; size_t max_variables PL_DEPRECATED; // see `max_variable_comps` }; // Backwards compatibility aliases #define max_xfer_size max_buf_size #define align_tex_xfer_stride align_tex_xfer_pitch // Some `pl_gpu` operations allow sharing GPU resources with external APIs - // examples include interop with other graphics APIs such as CUDA, and also // various hardware decoding APIs. This defines the mechanism underpinning the // communication of such an interoperation. typedef uint64_t pl_handle_caps; enum pl_handle_type { PL_HANDLE_FD = (1 << 0), // `int fd` for POSIX-style APIs PL_HANDLE_WIN32 = (1 << 1), // `HANDLE` for win32 API PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API PL_HANDLE_DMA_BUF = (1 << 3), // 'int fd' for a dma_buf fd PL_HANDLE_HOST_PTR = (1 << 4), // `void *` for a host-allocated pointer }; struct pl_gpu_handle_caps { pl_handle_caps tex; // supported handles for `pl_tex` + `pl_shared_mem` pl_handle_caps buf; // supported handles for `pl_buf` + `pl_shared_mem` pl_handle_caps sync; // supported handles for `pl_sync` }; // Wrapper for the handle used to communicate a shared resource externally. // This handle is owned by the `pl_gpu` - if a user wishes to use it in a way // that takes over ownership (e.g. importing into some APIs), they must clone // the handle before doing so (e.g. using `dup` for fds). It is important to // read the external API documentation _very_ carefully as different handle // types may be managed in different ways. (eg: CUDA takes ownership of an fd, // but does not take ownership of a win32 handle). union pl_handle { int fd; // PL_HANDLE_FD / PL_HANDLE_DMA_BUF void *handle; // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT void *ptr; // PL_HANDLE_HOST_PTR }; // Structure encapsulating memory that is shared between libplacebo and the // user. This memory can be imported into external APIs using the handle. // // If the object a `pl_shared_mem` belongs to is destroyed (e.g. via // `pl_buf_destroy`), the handle becomes undefined, as do the contents of the // memory it points to, as well as any external API objects imported from it. struct pl_shared_mem { union pl_handle handle; size_t size; // the total size of the memory referenced by this handle size_t offset; // the offset of the object within the referenced memory // Note: `size` is optional for some APIs and handle types, in particular // when importing DMABUFs or D3D11 textures. // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that // describes this resource. Note that when importing `pl_buf`, this must // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any // format modifier supported by the implementation. uint64_t drm_format_mod; // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to // set the image stride (AKA pitch) in memory. If left as 0, defaults to // the image width/height. size_t stride_w; size_t stride_h; }; // Structure grouping PCI bus address fields for GPU devices struct pl_gpu_pci_address { uint32_t domain; uint32_t bus; uint32_t device; uint32_t function; }; // (Deprecated) Capability bits. Provided for backwards compatibility. typedef uint64_t pl_gpu_caps; enum PL_DEPRECATED { PL_GPU_CAP_COMPUTE = 1 << 0, // see `pl_glsl_version.compute` PL_GPU_CAP_PARALLEL_COMPUTE = 1 << 1, // see `pl_gpu_limits.compute_queues` PL_GPU_CAP_INPUT_VARIABLES = 1 << 2, // see `pl_gpu_limits.max_variable_comps` PL_GPU_CAP_MAPPED_BUFFERS = 1 << 3, // see `pl_gpu_limits.max_mapped_size` PL_GPU_CAP_BLITTABLE_1D_3D = 1 << 4, // see `pl_gpu_limits.blittable_1d_3d` PL_GPU_CAP_SUBGROUPS = 1 << 5, // see `pl_glsl_version.subgroup_size` PL_GPU_CAP_CALLBACKS = 1 << 6, // see `pl_gpu_limits.callbacks` PL_GPU_CAP_THREAD_SAFE = 1 << 7, // see `pl_gpu_limits.thread_safe` PL_GPU_CAP_SPEC_CONSTANTS = 1 << 8, // see `pl_gpu_limits.max_constants` }; typedef const PL_STRUCT(pl_fmt) *pl_fmt; // Abstract device context which wraps an underlying graphics context and can // be used to dispatch rendering commands. // // Thread-safety: Depends on `pl_gpu_limits.thread_safe` typedef const PL_STRUCT(pl_gpu) { pl_log log; struct pl_glsl_version glsl; // GLSL features supported by this GPU struct pl_gpu_limits limits; // physical device limits and capabilities // Fields relevant to external API interop. If the underlying device does // not support interop with other APIs, these will all be {0}. struct pl_gpu_handle_caps export_caps; // supported handles for exporting struct pl_gpu_handle_caps import_caps; // supported handles for importing uint8_t uuid[16]; // underlying device UUID // Supported texture formats, in preference order. (If there are multiple // similar formats, the "better" ones come first) pl_fmt *formats; int num_formats; // PCI Bus address of the underlying device, to help with interop. // This will only be filled in if interop is supported. struct pl_gpu_pci_address pci; // (Deprecated) Backwards compatibility fields. pl_log ctx PL_DEPRECATED; // equal to `log` pl_gpu_caps caps PL_DEPRECATED; // replaced by `glsl` and `limits` } *pl_gpu; // No longer functional. See `pl_gpu_limits.align_tex_xfer_pitch`. PL_DEPRECATED static inline int pl_optimal_transfer_stride(pl_gpu _gpu, int dim) { (void) _gpu; return dim; } enum pl_fmt_type { PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float) PL_FMT_SNORM, // signed, normalized integer format (sampled as float) PL_FMT_UINT, // unsigned integer format (sampled as integer) PL_FMT_SINT, // signed integer format (sampled as integer) PL_FMT_FLOAT, // (signed) float formats, any bit size PL_FMT_TYPE_COUNT, }; enum pl_fmt_caps { PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX) PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG) PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR) PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt) PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend) PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit) PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer PL_FMT_CAP_HOST_READABLE = 1 << 9, // may be used with `host_readable` textures PL_FMT_CAP_READWRITE = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE // Notes: // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute` // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE // - PL_FMT_CAP_VERTEX implies that the format is non-opaque // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque }; // Structure describing a texel/vertex format. PL_STRUCT(pl_fmt) { const char *name; // symbolic name for this format (e.g. rgba32f) uint64_t signature; // unique but stable signature (for pass reusability) enum pl_fmt_type type; // the format's data type and interpretation enum pl_fmt_caps caps; // the features supported by this format int num_components; // number of components for this format int component_depth[4]; // meaningful bits per component, texture precision size_t internal_size; // internal texel size (for blit compatibility) // This controls the relationship between the data as seen by the host and // the way it's interpreted by the texture. The host representation is // always tightly packed (no padding bits in between each component). // // This representation assumes little endian ordering, i.e. components // being ordered from LSB to MSB in memory. Note that for oddly packed // formats like rgb10a2 or rgb565, this is inconsistent with the naming. // (That is to say, rgb565 has sample order {2, 1, 0} under this convention // - because rgb565 treats the R channel as the *most* significant bits) // // If `opaque` is true, then there's no meaningful correspondence between // the two, and all of the remaining fields in this section are unset. // // If `emulated` is true, then this format doesn't actually exist on the // GPU as an uploadable texture format - and any apparent support is being // emulated (typically using compute shaders in the upload path). bool opaque; bool emulated; size_t texel_size; // total size in bytes per texel size_t texel_align; // texel alignment requirements (bytes) int host_bits[4]; // number of meaningful bits in host memory int sample_order[4]; // sampled index for each component, e.g. // {2, 1, 0, 3} for BGRA textures // For sampleable formats, this bool indicates whether or not the format // is compatible with `textureGather()` bool gatherable; // If usable as a vertex or texel buffer format, this gives the GLSL type // corresponding to the data. (e.g. vec4) const char *glsl_type; // If usable as a storage image or texel storage buffer // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL // texel format corresponding to the format (e.g. rgba16ui), if any. This // field may be NULL, in which case the format modifier may be left // unspecified. const char *glsl_format; // If non-opaque, this gives the fourcc associated with the host // representation. In particular, this is intended for use with // PL_HANDLE_DMA_BUF, where this field will match the DRM format from // . May be 0, for formats without matching DRM fourcc. uint32_t fourcc; // If `fourcc` is set, this contains the list of supported drm format // modifiers for this format. const uint64_t *modifiers; int num_modifiers; }; // Returns whether or not a pl_fmt's components are ordered sequentially // in memory in the order RGBA. bool pl_fmt_is_ordered(pl_fmt fmt); // Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM) bool pl_fmt_is_float(pl_fmt fmt); // Returns whether or not a pl_fmt supports a given DRM modifier. bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier); // Helper function to find a format with a given number of components and // minimum effective precision per component. If `host_bits` is set, then the // format will always be non-opaque, unpadded, ordered and have exactly this // bit depth for each component. Finally, all `caps` must be supported. pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps); // Finds a vertex format for a given configuration. The resulting vertex will // have a component depth equivalent to the sizeof() the equivalent host type. // (e.g. PL_FMT_FLOAT will always have sizeof(float)) pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components); // Find a format based on its name. pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name); // Find a format based on its fourcc. pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc); // A generic 'timer query' object. These can be used to measure an // approximation of the GPU execution time of a given operation. Due to the // highly asynchronous nature of GPUs, the actual results of any individual // timer query may be delayed by quite a bit. As such, users should avoid // trying to pair any particular GPU command with any particular timer query // result, and only reuse `pl_timer` objects with identical operations. The // results of timer queries are guaranteed to be in-order, but individual // queries may be dropped, and some operations might not record timer results // at all. (For example, if the underlying hardware does not support timer // queries for a given operation type) // // Thread-safety: Unsafe typedef PL_STRUCT(pl_timer) *pl_timer; // Creates a new timer object. This may return NULL, for example if the // implementation does not support timers, but since passing NULL to // `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not // concern themselves with handling this. pl_timer pl_timer_create(pl_gpu gpu); void pl_timer_destroy(pl_gpu gpu, pl_timer *); // Queries any results that have been measured since the last execution of // `pl_timer_query`. There may be more than one result, in which case the user // should simply call the function again to get the subsequent values. This // function returns a value of 0 in the event that there are no more // unprocessed results. // // The results are reported in nanoseconds, but the actual precision of the // timestamp queries may be significantly lower. // // Note: Results do not queue up indefinitely. Generally, the implementation // will only keep track of a small, fixed number of results internally. Make // sure to include this function as part of your main rendering loop to process // all of its results, or older results will be overwritten by newer ones. uint64_t pl_timer_query(pl_gpu gpu, pl_timer); enum pl_buf_mem_type { PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate PL_BUF_MEM_HOST, // try allocating from host memory (RAM) PL_BUF_MEM_DEVICE, // try allocating from device memory (VRAM) PL_BUF_MEM_TYPE_COUNT, // Note: This distinction only matters for discrete GPUs }; // Structure describing a buffer. struct pl_buf_params { size_t size; // size in bytes (must be <= `pl_gpu_limits.max_buf_size`) bool host_writable; // contents may be updated via pl_buf_write() bool host_readable; // contents may be read back via pl_buf_read() bool host_mapped; // create a persistent, RW mapping (pl_buf.data) // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM. // Requires `size <= pl_gpu_limits.max_ubo_size` bool uniform; // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE. // Requires `size <= pl_gpu_limits.max_ssbo_size` bool storable; // May be used as the source of vertex data for `pl_pass_run`. bool drawable; // Provide a hint for the memory type you want to use when allocating // this buffer's memory. // // Note: Restrictions may apply depending on the usage flags. In // particular, allocating buffers with `uniform` or `storable` enabled from // non-device memory will almost surely fail. enum pl_buf_mem_type memory_type; // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and // `storage` are respectively also enabled. pl_fmt format; // At most one of `export_handle` and `import_handle` can be set for a // buffer. // Setting this indicates that the memory backing this buffer should be // shared with external APIs, If so, this must be exactly *one* of // `pl_gpu.export_caps.buf`. enum pl_handle_type export_handle; // Setting this indicates that the memory backing this buffer will be // imported from an external API. If so, this must be exactly *one* of // `pl_gpu.import_caps.buf`. enum pl_handle_type import_handle; // If the shared memory is being imported, the import handle must be // specified here. Otherwise, this is ignored. struct pl_shared_mem shared_mem; // If non-NULL, the buffer will be created with these contents. Otherwise, // the initial data is undefined. Using this does *not* require setting // host_writable. const void *initial_data; // Arbitrary user data. libplacebo does not use this at all. void *user_data; // Arbitrary identifying tag. Used only for debugging purposes. pl_debug_tag debug_tag; }; #define pl_buf_params(...) (&(struct pl_buf_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // A generic buffer, which can be used for multiple purposes (texture transfer, // storage buffer, uniform buffer, etc.) // // Note on efficiency: A pl_buf does not necessarily represent a true "buffer" // object on the underlying graphics API. It may also refer to a sub-slice of // a larger buffer, depending on the implementation details of the GPU. The // bottom line is that users do not need to worry about the efficiency of using // many small pl_buf objects. Having many small pl_bufs, even lots of few-byte // vertex buffers, is designed to be completely fine. // // Thread-safety: Unsafe typedef const PL_STRUCT(pl_buf) { struct pl_buf_params params; uint8_t *data; // for persistently mapped buffers, points to the first byte // If `params.handle_type` is set, this structure references the shared // memory backing this buffer, via the requested handle type. // // While this buffer is not in an "exported" state, the contents of the // memory are undefined. (See: `pl_buf_export`) struct pl_shared_mem shared_mem; } *pl_buf; // Create a buffer. The type of buffer depends on the parameters. The buffer // parameters must adhere to the restrictions imposed by the pl_gpu_limits. // Returns NULL on failure. // // For buffers with shared memory, the buffer is considered to be in an // "exported" state by default, and may be used directly by the external API // after being created (until the first libplacebo operation on the buffer). pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params); void pl_buf_destroy(pl_gpu gpu, pl_buf *buf); // This behaves like `pl_buf_create`, but if the buffer already exists and has // incompatible parameters, it will get destroyed first. A buffer is considered // "compatible" if it has the same buffer type and texel format, a size greater // than or equal to the requested size, and it has a superset of the features // the user requested. After this operation, the contents of the buffer are // undefined. // // Note: Due to its unpredictability, it's not allowed to use this with // `params->initial_data` being set. Similarly, it's not allowed on a buffer // with `params->export_handle`. since this may invalidate the corresponding // external API's handle. Conversely, it *is* allowed on a buffer with // `params->host_mapped`, and the corresponding `buf->data` pointer *may* // change as a result of doing so. // // Note: If the `user_data` alone changes, this does not trigger a buffer // recreation. In theory, this can be used to detect when the buffer ended // up being recreated. bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params); // Update the contents of a buffer, starting at a given offset (must be a // multiple of 4) and up to a given size, with the contents of *data. // // This function will block until the buffer is no longer in use. Use // `pl_buf_poll` to perform non-blocking queries of buffer availability. // // Note: This function can incur synchronization overhead, so it shouldn't be // used in tight loops. If you do need to loop (e.g. to perform a strided // write), consider using host-mapped buffers, or fixing the memory in RAM, // before calling this function. void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size); // Read back the contents of a buffer, starting at a given offset, storing the // data into *dest. Returns whether successful. // // This function will block until the buffer is no longer in use. Use // `pl_buf_poll` to perform non-blocking queries of buffer availability. bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size); // Copy `size` bytes from one buffer to another, reading from and writing to // the respective offsets. void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); // Initiates a buffer export operation, allowing a buffer to be accessed by an // external API. This is only valid for buffers with `params.handle_type`. // Calling this twice in a row is a harmless no-op. Returns whether successful. // // There is no corresponding "buffer import" operation, the next libplacebo // operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write // and pl_buf_read) will implicitly import the buffer back to libplacebo. Users // must ensure that all pending operations made by the external API are fully // completed before using it in libplacebo again. (Otherwise, the behaviour // is undefined) // // Please note that this function returning does not mean the memory is // immediately available as such. In general, it will mark a buffer as "in use" // in the same way any other buffer operation would, and it is the user's // responsibility to wait until `pl_buf_poll` returns false before accessing // the memory from the external API. // // In terms of the access performed by this operation, it is not considered a // "read" or "write" and therefore does not technically conflict with reads or // writes to the buffer performed by the host (via mapped memory - any use of // `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export). // However, restrictions made by the external API may apply that prevent this. // // The recommended use pattern is something like this: // // while (loop) { // pl_buf buf = get_free_buffer(); // or block on pl_buf_poll // // write to the buffer using the external API // pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports // pl_buf_export(gpu, buf); // } // // i.e. perform an external API operation, then use and immediately export the // buffer in libplacebo, and finally wait until `pl_buf_poll` is false before // re-using it in the external API. (Or get a new buffer in the meantime) bool pl_buf_export(pl_gpu gpu, pl_buf buf); // Returns whether or not a buffer is currently "in use". This can either be // because of a pending read operation, a pending write operation or a pending // buffer export operation. Any access to the buffer by external APIs or via // the host pointer (for host-mapped buffers) is forbidden while a buffer is // "in use". The only exception to this rule is multiple reads, for example // reading from a buffer with `pl_tex_upload` while simultaneously reading from // it using mapped memory. // // The `timeout`, specified in nanoseconds, indicates how long to block for // before returning. If set to 0, this function will never block, and only // returns the current status of the buffer. The actual precision of the // timeout may be significantly longer than one nanosecond, and has no upper // bound. This function does not provide hard latency guarantees. This function // may also return at any time, even if the buffer is still in use. If the user // wishes to block until the buffer is definitely no longer in use, the // recommended usage is: // // while (pl_buf_poll(gpu, buf, UINT64_MAX)) // ; // do nothing // // Note: libplacebo operations on buffers are always internally synchronized, // so this is only needed for host-mapped or externally exported buffers. // However, it may be used to do non-blocking queries before calling blocking // functions such as `pl_buf_read`. // // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly // synchronized, meaning it can safely be called on a `pl_buf` that is in use // by another thread. bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout); enum pl_tex_sample_mode { PL_TEX_SAMPLE_NEAREST, // nearest neighbour sampling PL_TEX_SAMPLE_LINEAR, // linear filtering, requires PL_FMT_CAP_LINEAR PL_TEX_SAMPLE_MODE_COUNT, }; enum pl_tex_address_mode { PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture PL_TEX_ADDRESS_MODE_COUNT, }; // Structure describing a texture. struct pl_tex_params { int w, h, d; // physical dimension; unused dimensions must be 0 pl_fmt format; // The following bools describe what operations can be performed. The // corresponding pl_fmt capability must be set for every enabled // operation type. bool sampleable; // usable as a PL_DESC_SAMPLED_TEX bool renderable; // usable as a render target (pl_pass_run) // (must only be used with 2D textures) bool storable; // usable as a storage image (PL_DESC_IMG_*) bool blit_src; // usable as a blit source bool blit_dst; // usable as a blit destination bool host_writable; // may be updated with pl_tex_upload() bool host_readable; // may be fetched with pl_tex_download() // Note: For `blit_src`, `blit_dst`, the texture must either be // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set. // At most one of `export_handle` and `import_handle` can be set for a // texture. // Setting this indicates that the memory backing this texture should be // shared with external APIs, If so, this must be exactly *one* of // `pl_gpu.export_caps.tex`. enum pl_handle_type export_handle; // Setting this indicates that the memory backing this texture will be // imported from an external API. If so, this must be exactly *one* of // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`. enum pl_handle_type import_handle; // If the shared memory is being imported, the import handle must be // specified here. Otherwise, this is ignored. struct pl_shared_mem shared_mem; // If non-NULL, the texture will be created with these contents (tightly // packed). Using this does *not* require setting host_writable. Otherwise, // the initial data is undefined. Mutually exclusive with `import_handle`. const void *initial_data; // Arbitrary user data. libplacebo does not use this at all. void *user_data; // Arbitrary identifying tag. Used only for debugging purposes. pl_debug_tag debug_tag; }; #define pl_tex_params(...) (&(struct pl_tex_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) static inline int pl_tex_params_dimension(const struct pl_tex_params params) { return params.d ? 3 : params.h ? 2 : 1; } enum pl_sampler_type { PL_SAMPLER_NORMAL, // gsampler2D, gsampler3D etc. PL_SAMPLER_RECT, // gsampler2DRect PL_SAMPLER_EXTERNAL, // gsamplerExternalOES PL_SAMPLER_TYPE_COUNT, }; // Conflates the following typical GPU API concepts: // - texture itself // - sampler state // - staging buffers for texture upload // - framebuffer objects // - wrappers for swapchain framebuffers // - synchronization needed for upload/rendering/etc. // // Essentially a pl_tex can be anything ranging from a normal texture, a wrapped // external/real framebuffer, a framebuffer object + texture pair, a mapped // texture (via pl_hwdec), or other sorts of things that can be sampled from // and/or rendered to. // // Thread-safety: Unsafe typedef const PL_STRUCT(pl_tex) { struct pl_tex_params params; // If `params.export_handle` is set, this structure references the shared // memory backing this buffer, via the requested handle type. // // While this texture is not in an "exported" state, the contents of the // memory are undefined. (See: `pl_tex_export`) // // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be // made about the cross-driver compatibility of textures exported this way. struct pl_shared_mem shared_mem; // If `params.sampleable` is true, this indicates the correct sampler type // to use when sampling from this texture. enum pl_sampler_type sampler_type; } *pl_tex; // Create a texture (with undefined contents). Returns NULL on failure. This is // assumed to be an expensive/rare operation, and may need to perform memory // allocation or framebuffer creation. pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params); void pl_tex_destroy(pl_gpu gpu, pl_tex *tex); // This works like `pl_tex_create`, but if the texture already exists and has // incompatible texture parameters, it will get destroyed first. A texture is // considered "compatible" if it has the same texture format and sample/address // mode and it supports a superset of the features the user requested. // // Even if the texture is not recreated, calling this function will still // invalidate the contents of the texture. (Note: Because of this, // `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error) // // Note: If the `user_data` alone changes, this does not trigger a texture // recreation. In theory, this can be used to detect when the texture ended // up being recreated. bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params); // Invalidates the contents of a texture. After this, the contents are fully // undefined. void pl_tex_invalidate(pl_gpu gpu, pl_tex tex); union pl_clear_color { float f[4]; int32_t i[4]; uint32_t u[4]; }; // Clear the dst texture with the given color (rgba). This is functionally // identical to a blit operation, which means `dst->params.blit_dst` must be // set. void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color); // Wrapper for `pl_tex_clear_ex` which only works for floating point textures. void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]); struct pl_tex_blit_params { // The texture to blit from. Must have `params.blit_src` enabled. pl_tex src; // The texture to blit to. Must have `params.blit_dst` enabled, and a // format that is loosely compatible with `src`. This essentially means // that they must have the same `internal_size`. Additionally, UINT // textures can only be blitted to other UINT textures, and SINT textures // can only be blitted to other SINT textures. pl_tex dst; // The region of the source texture to blit. Must be within the texture // bounds of `src`. May be flipped. (Optional) struct pl_rect3d src_rc; // The region of the destination texture to blit into. Must be within the // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in // `dst` are preserved. (Optional) struct pl_rect3d dst_rc; // If `src_rc` and `dst_rc` have different sizes, the texture will be // scaled using the given texture sampling mode. enum pl_tex_sample_mode sample_mode; }; #define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ }) // Copy a sub-rectangle from one texture to another. void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); // Structure describing a texture transfer operation. struct pl_tex_transfer_params { // Texture to transfer to/from. Depending on the type of the operation, // this must have params.host_writable (uploads) or params.host_readable // (downloads) set, respectively. pl_tex tex; // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y // and z fields of `rc`, as well as the corresponding pitches, are ignored. // In all other cases, the pitch must be large enough to contain the // corresponding dimension of `rc`, and the `rc` must be normalized and // fully contained within the image dimensions. Missing fields in the `rc` // are inferred from the image size. If unset, the pitch is inferred // from `rc` (that is, it's assumed that the data is tightly packed in the // buffer). Otherwise, `row_pitch` *must* be a multiple of // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple // of `row_pitch`. struct pl_rect3d rc; // region of the texture to transfer size_t row_pitch; // the number of bytes separating image rows size_t depth_pitch; // the number of bytes separating image planes // Deprecated variants of `row_pitch` and `depth_pitch` for backwards // compatibility with older versions of libplacebo. Avoid using. unsigned int stride_w PL_DEPRECATED; unsigned int stride_h PL_DEPRECATED; // An optional timer to report the approximate duration of the texture // transfer to. Note that this is only an approximation, since the actual // texture transfer may happen entirely in the background (in particular, // for implementations with asynchronous transfer capabilities). It's also // not guaranteed that all GPUs support this. pl_timer timer; // An optional callback to fire after the operation completes. If this is // specified, then the operation is performed asynchronously. Note that // transfers to/from buffers are always asynchronous, even without, this // field, so it's more useful for `ptr` transfers. (Though it can still be // helpful to avoid having to manually poll buffers all the time) // // When this is *not* specified, uploads from `ptr` are still asynchronous // but require a host memcpy, while downloads from `ptr` are blocking. As // such, it's recommended to always try using asynchronous texture // transfers wherever possible. // // Note: Requires `pl_gpu_limits.callbacks` // // Note: Callbacks are implicitly synchronized, meaning that callbacks are // guaranteed to never execute concurrently with other callbacks. However, // they may execute from any thread that the `pl_gpu` is used on. void (*callback)(void *priv); void *priv; // arbitrary user data // For the data source/target of a transfer operation, there are two valid // options: // // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`) pl_buf buf; // buffer to use size_t buf_offset; // offset of data within buffer, should be a // multiple of `tex->params.format->texel_size` // 2. Transferring to/from host memory directly: void *ptr; // address of data // Note: The contents of the memory region / buffer must exactly match the // texture format; i.e. there is no explicit conversion between formats. }; #define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ }) // Upload data to a texture. Returns whether successful. bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Download data from a texture. Returns whether successful. bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Returns whether or not a texture is currently "in use". This can either be // because of a pending read operation, a pending write operation or a pending // texture export operation. Note that this function's usefulness is extremely // limited under ordinary circumstances. In practically all cases, textures do // not need to be directly synchronized by the user, except when interfacing // with external libraries. This function should NOT, however, be used as a // crutch to avoid having to implement semaphore-based synchronization. See // `pl_sync` for a better replacement for external API interop. // // A good example of a use case in which this function is required is when // interoperating with external memory management that needs to know when an // imported texture is safe to free / reclaim internally, in which case // semaphores are insufficient because memory management is a host operation. // // The `timeout`, specified in nanoseconds, indicates how long to block for // before returning. If set to 0, this function will never block, and only // returns the current status of the texture. The actual precision of the // timeout may be significantly longer than one nanosecond, and has no upper // bound. This function does not provide hard latency guarantees. This function // may also return at any time, even if the texture is still in use. If the // user wishes to block until the texture is definitely no longer in use, the // recommended usage is: // // while (pl_tex_poll(gpu, buf, UINT64_MAX)) // ; // do nothing // // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly // synchronized, meaning it can safely be called on a `pl_tex` that is in use // by another thread. bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout); // Data type of a shader input variable (e.g. uniform, or UBO member) enum pl_var_type { PL_VAR_INVALID = 0, PL_VAR_SINT, // C: int GLSL: int/ivec PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec PL_VAR_FLOAT, // C: float GLSL: float/vec/mat PL_VAR_TYPE_COUNT }; // Returns the host size (in bytes) of a pl_var_type. size_t pl_var_type_size(enum pl_var_type type); // Represents a shader input variable (concrete data, e.g. vector, matrix) struct pl_var { const char *name; // name as used in the shader enum pl_var_type type; // The total number of values is given by dim_v * dim_m. For example, a // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4 // and dim_m = 3. int dim_v; // vector dimension int dim_m; // matrix dimension (number of columns, see below) int dim_a; // array dimension }; // Helper functions for constructing the most common pl_vars, with names // corresponding to their corresponding GLSL built-in types. struct pl_var pl_var_float(const char *name); struct pl_var pl_var_vec2(const char *name); struct pl_var pl_var_vec3(const char *name); struct pl_var pl_var_vec4(const char *name); struct pl_var pl_var_mat2(const char *name); struct pl_var pl_var_mat2x3(const char *name); struct pl_var pl_var_mat2x4(const char *name); struct pl_var pl_var_mat3(const char *name); struct pl_var pl_var_mat3x4(const char *name); struct pl_var pl_var_mat4x2(const char *name); struct pl_var pl_var_mat4x3(const char *name); struct pl_var pl_var_mat4(const char *name); struct pl_var pl_var_int(const char *name); struct pl_var pl_var_ivec2(const char *name); struct pl_var pl_var_ivec3(const char *name); struct pl_var pl_var_ivec4(const char *name); struct pl_var pl_var_uint(const char *name); struct pl_var pl_var_uvec2(const char *name); struct pl_var pl_var_uvec3(const char *name); struct pl_var pl_var_uvec4(const char *name); struct pl_named_var { const char *glsl_name; struct pl_var var; }; // The same list as above, tagged by name and terminated with a {0} entry. extern const struct pl_named_var pl_var_glsl_types[]; // Efficient helper function for performing a lookup in the above array. // Returns NULL if the variable is not legal. Note that the array dimension is // ignored, since it's usually part of the variable name and not the type name. const char *pl_var_glsl_type_name(struct pl_var var); // Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means // that the pl_var's type will be the same as the vertex's sampled type (e.g. // PL_FMT_UNORM gets turned into PL_VAR_FLOAT). struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name); // Describes the memory layout of a variable, relative to some starting location // (typically the offset within a uniform/storage/pushconstant buffer) // // Note on matrices: All GPUs expect column major matrices, for both buffers and // input variables. Care needs to be taken to avoid trying to use e.g. a // pl_matrix3x3 (which is row major) directly as a pl_var_update.data! // // In terms of the host layout, a column-major matrix (e.g. matCxR) with C // columns and R rows is treated like an array vecR[C]. The `stride` here refers // to the separation between these array elements, i.e. the separation between // the individual columns. // // Visualization of a mat4x3: // // 0 1 2 3 <- columns // 0 [ (A) (D) (G) (J) ] // 1 [ (B) (E) (H) (K) ] // 2 [ (C) (F) (I) (L) ] // ^ rows // // Layout in GPU memory: (stride=16, size=60) // // [ A B C ] X <- column 0, offset +0 // [ D E F ] X <- column 1, offset +16 // [ G H I ] X <- column 2, offset +32 // [ J K L ] <- column 3, offset +48 // // Note the lack of padding on the last column in this example. // In general: size <= stride * dim_m // // C representation: (stride=12, size=48) // // { { A, B, C }, // { D, E, F }, // { G, H, I }, // { J, K, L } } // // Note on arrays: `stride` represents both the stride between elements of a // matrix, and the stride between elements of an array. That is, there is no // distinction between the columns of a matrix and the rows of an array. For // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10. // // For non-array/matrix types, `stride` is equal to `size`. struct pl_var_layout { size_t offset; // the starting offset of the first byte size_t stride; // the delta between two elements of an array/matrix size_t size; // the total size of the input }; // Returns the host layout of an input variable as required for a // tightly-packed, byte-aligned C data type, given a starting offset. struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var); // Returns the GLSL std140 layout of an input variable given a current buffer // offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM // // The normal way to use this function is when calculating the size and offset // requirements of a uniform buffer in an incremental fashion, to calculate the // new offset of the next variable in this buffer. struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var); // Returns the GLSL std430 layout of an input variable given a current buffer // offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and // for push constants. struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var); // Convenience definitions / friendly names for these #define pl_buf_uniform_layout pl_std140_layout #define pl_buf_storage_layout pl_std430_layout #define pl_push_constant_layout pl_std430_layout // Like memcpy, but copies bytes from `src` to `dst` in a manner governed by // the stride and size of `dst_layout` as well as `src_layout`. Also takes // into account the respective `offset`. void memcpy_layout(void *dst, struct pl_var_layout dst_layout, const void *src, struct pl_var_layout src_layout); // Represents a compile-time constant. struct pl_constant { enum pl_var_type type; // constant data type uint32_t id; // GLSL `constant_id` size_t offset; // byte offset in `constant_data` }; // Represents a vertex attribute. struct pl_vertex_attrib { const char *name; // name as used in the shader pl_fmt fmt; // data format (must have PL_FMT_CAP_VERTEX) size_t offset; // byte offset into the vertex struct int location; // vertex location (as used in the shader) }; // Returns an abstract namespace index for a given descriptor type. This will // always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use // this to figure out which descriptors may share the same value of `binding`. // Bindings must only be unique for all descriptors within the same namespace. int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type); // Access mode of a shader input descriptor. enum pl_desc_access { PL_DESC_ACCESS_READWRITE, PL_DESC_ACCESS_READONLY, PL_DESC_ACCESS_WRITEONLY, PL_DESC_ACCESS_COUNT, }; // Returns the GLSL syntax for a given access mode (e.g. "readonly"). const char *pl_desc_access_glsl_name(enum pl_desc_access mode); // Represents a shader descriptor (e.g. texture or buffer binding) struct pl_desc { const char *name; // name as used in the shader enum pl_desc_type type; // The binding of this descriptor, as used in the shader. All bindings // within a namespace must be unique. (see: pl_desc_namespace) int binding; // For storage images and storage buffers, this can be used to restrict // the type of access that may be performed on the descriptor. Ignored for // the other descriptor types (uniform buffers and sampled textures are // always read-only). enum pl_desc_access access; }; // Framebuffer blending mode (for raster passes) enum pl_blend_mode { PL_BLEND_ZERO, PL_BLEND_ONE, PL_BLEND_SRC_ALPHA, PL_BLEND_ONE_MINUS_SRC_ALPHA, PL_BLEND_MODE_COUNT, }; struct pl_blend_params { enum pl_blend_mode src_rgb; enum pl_blend_mode dst_rgb; enum pl_blend_mode src_alpha; enum pl_blend_mode dst_alpha; }; #define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ }) // Typical alpha compositing extern const struct pl_blend_params pl_alpha_overlay; enum pl_prim_type { PL_PRIM_TRIANGLE_LIST, PL_PRIM_TRIANGLE_STRIP, PL_PRIM_TYPE_COUNT, }; enum pl_index_format { PL_INDEX_UINT16 = 0, PL_INDEX_UINT32, PL_INDEX_FORMAT_COUNT, }; enum pl_pass_type { PL_PASS_INVALID = 0, PL_PASS_RASTER, // vertex+fragment shader PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`) PL_PASS_TYPE_COUNT, }; // Description of a rendering pass. It conflates the following: // - GLSL shader(s) and its list of inputs // - target parameters (for raster passes) struct pl_pass_params { enum pl_pass_type type; // Input variables. struct pl_var *variables; int num_variables; // Input descriptors. struct pl_desc *descriptors; int num_descriptors; // Compile-time specialization constants. struct pl_constant *constants; int num_constants; // Initial data for the specialization constants. Optional. If NULL, // specialization constants receive the values from the shader text. void *constant_data; // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size size_t push_constants_size; // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as // a compute shader. const char *glsl_shader; // Highly implementation-specific byte array storing a compiled version of // the same shader. Can be used to speed up pass creation on already // known/cached shaders. // // Note: There are a few restrictions on this. Passing an out-of-date // cache, passing a cache corresponding to a different program, or passing // a cache belonging to a different GPU, are all guaranteed to be valid. // // It is, however, undefined behavior to pass arbitrary or maliciously // crafted bytes - and users are advised that attaching a shader cache // obtained from the internet could lead to arbitrary program behavior // (possibly including code execution). const uint8_t *cached_program; size_t cached_program_len; // --- type==PL_PASS_RASTER only // Describes the interpretation and layout of the vertex data. enum pl_prim_type vertex_type; struct pl_vertex_attrib *vertex_attribs; int num_vertex_attribs; size_t vertex_stride; // must be a multiple of limits.align_vertex_stride // The vertex shader itself. const char *vertex_shader; // Target format. The format must support PL_FMT_CAP_RENDERABLE. The // resulting pass may only be used on textures that have a format with a // `pl_fmt.signature` compatible to this format. pl_fmt target_format; // Target blending mode. If this is NULL, blending is disabled. Otherwise, // the `target_format` must also support PL_FMT_CAP_BLENDABLE. const struct pl_blend_params *blend_params; // If false, the target's existing contents will be discarded before the // pass is run. (Semantically equivalent to calling pl_tex_invalidate // before every pl_pass_run, but slightly more efficient) // // Specifying `blend_params` requires `load_target` to be true. bool load_target; // (Deprecated) Fallback for `target_format`. PL_STRUCT(pl_tex) target_dummy PL_DEPRECATED; }; #define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ }) // Conflates the following typical GPU API concepts: // - various kinds of shaders // - rendering pipelines // - descriptor sets, uniforms, other bindings // - all synchronization necessary // - the current values of all inputs // // Thread-safety: Unsafe typedef const PL_STRUCT(pl_pass) { struct pl_pass_params params; } *pl_pass; // Compile a shader and create a render pass. This is a rare/expensive // operation and may take a significant amount of time, even if a cached // program is used. Returns NULL on failure. // // The resulting pl_pass->params.cached_program will be initialized by // this function to point to a new, valid cached program (if any). pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params); void pl_pass_destroy(pl_gpu gpu, pl_pass *pass); struct pl_desc_binding { const void *object; // pl_* object with type corresponding to pl_desc_type // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler. enum pl_tex_address_mode address_mode; enum pl_tex_sample_mode sample_mode; }; struct pl_var_update { int index; // index into params.variables[] const void *data; // pointer to raw byte data corresponding to pl_var_host_layout() }; struct pl_pass_run_params { pl_pass pass; // If present, the shader will be re-specialized with the new constants // provided. This is a significantly cheaper operation than recompiling a // brand new shader, but should still be avoided if possible. // // Leaving it as NULL re-uses the existing specialization values. Ignored // if the shader has no specialization constants. Guaranteed to be a no-op // if the values have not changed since the last invocation. void *constant_data; // This list only contains descriptors/variables which have changed // since the previous invocation. All non-mentioned variables implicitly // preserve their state from the last invocation. struct pl_var_update *var_updates; int num_var_updates; // This list contains all descriptors used by this pass. It must // always be filled, even if the descriptors haven't changed. The order // must match that of pass->params.descriptors struct pl_desc_binding *desc_bindings; // The push constants for this invocation. This must always be set and // fully defined for every invocation if params.push_constants_size > 0. void *push_constants; // An optional timer to report the approximate runtime of this shader pass // invocation to. Note that this is only an approximation, since shaders // may overlap their execution times and contend for GPU time. pl_timer timer; // --- pass->params.type==PL_PASS_RASTER only // Target must be a 2D texture, `target->params.renderable` must be true, // and `target->params.format->signature` must match the signature provided // in `pass->params.target_format`. // // If the viewport or scissors are left blank, they are inferred from // target->params. // // WARNING: Rendering to a *target that is being read from by the same // shader is undefined behavior. In general, trying to bind the same // resource multiple times to the same shader is undefined behavior. pl_tex target; struct pl_rect2d viewport; // screen space viewport (must be normalized) struct pl_rect2d scissors; // target render scissors (must be normalized) // Number of vertices to render int vertex_count; // Vertex data may be provided in one of two forms: // // 1. Drawing from host memory directly const void *vertex_data; // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`) pl_buf vertex_buf; size_t buf_offset; // (Optional) Index data may be provided in the form given by `index_fmt`. // These will be used for instanced rendering. Similar to vertex data, this // can be provided in two forms: // 1. From host memory const void *index_data; enum pl_index_format index_fmt; // 2. From an index buffer (requires `index_buf->params.drawable`) pl_buf index_buf; size_t index_offset; // Note: Drawing from an index buffer requires vertex data to also be // present in buffer form, i.e. it's forbidden to mix `index_buf` with // `vertex_data` (though vice versa is allowed). // --- pass->params.type==PL_PASS_COMPUTE only // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the // corresponding index of limits.max_dispatch int compute_groups[3]; }; #define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ }) // Execute a render pass. void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); // A generic synchronization object intended for use with an external API. This // is not required when solely using libplacebo API functions, as all required // synchronisation is done internally. This comes in the form of a pair of // semaphores - one to synchronize access in each direction. // // Thread-safety: Unsafe typedef const PL_STRUCT(pl_sync) { enum pl_handle_type handle_type; // This handle is signalled by the `pl_gpu`, and waited on by the user. It // fires when it is safe for the user to access the shared resource. union pl_handle wait_handle; // This handle is signalled by the user, and waited on by the `pl_gpu`. It // must fire when the user has finished accessing the shared resource. union pl_handle signal_handle; } *pl_sync; // Create a synchronization object. Returns NULL on failure. // // `handle_type` must be exactly *one* of `pl_gpu.export_caps.sync`, and // indicates which type of handle to generate for sharing this sync object. pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type); // Destroy a `pl_sync`. Note that this invalidates the externally imported // semaphores. Users should therefore make sure that all operations that // wait on or signal any of the semaphore have been fully submitted and // processed by the external API before destroying the `pl_sync`. // // Despite this, it's safe to destroy a `pl_sync` if the only pending // operations that involve it are internal to libplacebo. void pl_sync_destroy(pl_gpu gpu, pl_sync *sync); // Initiates a texture export operation, allowing a texture to be accessed by // an external API. Returns whether successful. After this operation // successfully returns, it is guaranteed that `sync->wait_handle` will // eventually be signalled. For APIs where this is relevant, the image layout // should be specified as "general", e.g. `GL_LAYOUT_GENERAL_EXT` for OpenGL. // // There is no corresponding "import" operation - the next operation that uses // a texture will implicitly import the texture. Valid API usage requires that // the user *must* submit a semaphore signal operation on `sync->signal_handle` // before doing so. Not doing so is undefined behavior and may very well // deadlock the calling process and/or the graphics card! // // Note that despite this restriction, it is always valid to call // `pl_tex_destroy`, even if the texture is in an exported state, without // having to signal the corresponding sync object first. bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync); // This is semantically a no-op, but it provides a hint that you want to flush // any partially queued up commands and begin execution. There is normally no // need to call this, because queued commands will always be implicitly flushed // whenever necessary to make forward progress on commands like `pl_buf_poll`, // or when submitting a frame to a swapchain for display. In fact, calling this // function can negatively impact performance, because some GPUs rely on being // able to re-order and modify queued commands in order to enable optimizations // retroactively. // // The only time this might be beneficial to call explicitly is if you're doing // lots of offline processing, i.e. you aren't rendering to a swapchain but to // textures that you download from again. In that case you should call this // function after each "work item" to ensure good parallelism between them. // // It's worth noting that this function may block if you're over-feeding the // GPU without waiting for existing results to finish. void pl_gpu_flush(pl_gpu gpu); // This is like `pl_gpu_flush` but also blocks until the GPU is fully idle // before returning. Using this in your rendering loop is seriously disadvised, // and almost never the right solution. The intended use case is for deinit // logic, where users may want to force the all pending GPU operations to // finish so they can clean up their state more easily. // // After this operation is called, it's guaranteed that all pending buffer // operations are complete - i.e. `pl_buf_poll` is guaranteed to return false. // It's also guaranteed that any outstanding timer query results are available. // // Note: If you only care about buffer operations, you can accomplish this more // easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if // you have many buffers it may be more convenient to call this function // instead. The difference is that this function will also affect e.g. renders // to a `pl_swapchain`. void pl_gpu_finish(pl_gpu gpu); // Returns true if the GPU is considered to be in a "failed" state, which // during normal operation is typically the result of things like the device // being lost (due to e.g. power management). // // If this returns true, users *should* destroy and recreate the `pl_gpu`, // including all associated resources, via the appropriate mechanism. bool pl_gpu_is_failed(pl_gpu gpu); PL_API_END #endif // LIBPLACEBO_GPU_H_ libplacebo-v4.192.1/src/include/libplacebo/log.h000066400000000000000000000114601417677245700214400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LOG_H_ #define LIBPLACEBO_LOG_H_ #include #include PL_API_BEGIN // The log level associated with a given log message. enum pl_log_level { PL_LOG_NONE = 0, PL_LOG_FATAL, // results in total loss of function of a major component PL_LOG_ERR, // serious error; may result in degraded function PL_LOG_WARN, // warning; potentially bad, probably user-relevant PL_LOG_INFO, // informational message, also potentially harmless errors PL_LOG_DEBUG, // verbose debug message, informational PL_LOG_TRACE, // very noisy trace of activity,, usually benign PL_LOG_ALL = PL_LOG_TRACE, }; struct pl_log_params { // Logging callback. All messages, informational or otherwise, will get // redirected to this callback. The logged messages do not include trailing // newlines. Optional. void (*log_cb)(void *log_priv, enum pl_log_level level, const char *msg); void *log_priv; // The current log level. Controls the level of message that will be // redirected to the log callback. Setting this to PL_LOG_ALL means all // messages will be forwarded, but doing so indiscriminately can result // in increased CPU usage as it may enable extra debug paths based on the // configured log level. enum pl_log_level log_level; }; #define pl_log_params(...) (&(struct pl_log_params) { __VA_ARGS__ }) extern const struct pl_log_params pl_log_default_params; // Thread-safety: Safe // // Note: In any context in which `pl_log` is used, users may also pass NULL // to disable logging. In other words, NULL is a valid `pl_log`. typedef const PL_STRUCT(pl_log) { struct pl_log_params params; } *pl_log; // Creates a pl_log. For historical reasons, the argument `api_ver` must be // given as PL_API_VER. `params` defaults to `&pl_log_default_params` if left // as NULL. // // Note: As a general rule, any `params` struct used as an argument to a // function need only live until the corresponding function returns. pl_log pl_log_create(int api_ver, const struct pl_log_params *params); // Destroy a `pl_log` object. // // Note: As a general rule, all `_destroy` functions take the pointer to the // object to free as their parameter. This pointer is overwritten by NULL // afterwards. Calling a _destroy function on &{NULL} is valid, but calling it // on NULL itself is invalid. void pl_log_destroy(pl_log *log); // Update the parameters of a `pl_log` without destroying it. This can be // used to change the log function, log context or log level retroactively. // `params` defaults to `&pl_log_default_params` if left as NULL. // // Returns the previous params, atomically. struct pl_log_params pl_log_update(pl_log log, const struct pl_log_params *params); // Like `pl_log_update` but only updates the log level, leaving the log // callback intact. // // Returns the previous log level, atomically. enum pl_log_level pl_log_level_update(pl_log log, enum pl_log_level level); // Two simple, stream-based loggers. You can use these as the log_cb. If you // also set log_priv to a FILE* (e.g. stdout or stderr) it will be printed // there; otherwise, it will be printed to stdout or stderr depending on the // log level. // // The version with colors will use ANSI escape sequences to indicate the log // level. The version without will use explicit prefixes. void pl_log_simple(void *stream, enum pl_log_level level, const char *msg); void pl_log_color(void *stream, enum pl_log_level level, const char *msg); // Backwards compatibility with older versions of libplacebo #define pl_context pl_log #define pl_context_params pl_log_params static inline PL_DEPRECATED PL_STRUCT(pl_context) * pl_context_create(int api_ver, const struct pl_context_params *params) { return (PL_STRUCT(pl_context) *) pl_log_create(api_ver, params); } static inline PL_DEPRECATED void pl_context_destroy(PL_STRUCT(pl_context) **pctx) { pl_log_destroy((pl_log *) pctx); } static inline PL_DEPRECATED void pl_context_update(PL_STRUCT(pl_context) *ctx, const struct pl_context_params *params) { pl_log_update((pl_log) ctx, params); } PL_API_END #endif // LIBPLACEBO_LOG_H_ libplacebo-v4.192.1/src/include/libplacebo/meson.build000066400000000000000000000002241417677245700226440ustar00rootroot00000000000000configure_file( input: 'config.h.in', output: 'config.h', install_dir: get_option('includedir') / proj_name, configuration: conf_public, ) libplacebo-v4.192.1/src/include/libplacebo/opengl.h000066400000000000000000000211361417677245700221440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_OPENGL_H_ #define LIBPLACEBO_OPENGL_H_ #include #include PL_API_BEGIN // Note on thread safety: The thread safety of `pl_opengl` and any associated // GPU objects follows the same thread safety rules as the underlying OpenGL // context. In other words, they must only be called from the thread the OpenGL // context is current on. typedef const PL_STRUCT(pl_opengl) { pl_gpu gpu; } *pl_opengl; struct pl_opengl_params { // Enable OpenGL debug report callbacks. May have little effect depending // on whether or not the GL context was initialized with appropriate // debugging enabled. bool debug; // Allow the use of (suspected) software rasterizers and renderers. These // can be useful for debugging purposes, but normally, their use is // undesirable when GPU-accelerated processing is expected. bool allow_software; // Restrict the maximum allowed GLSL version. (Mainly for testing) int max_glsl_version; // Optional. Required when importing/exporting dmabufs as textures. void *egl_display; void *egl_context; // Optional callbacks to bind/release the OpenGL context on the current // thread. If these are specified, then the resulting `pl_gpu` will have // `pl_gpu_limits.thread_safe` enabled, and may therefore be used from any // thread without first needing to bind the OpenGL context. // // If the user is re-using the same OpenGL context in non-libplacebo code, // then these callbacks should include whatever synchronization is // necessary to prevent simultaneous use between libplacebo and the user. bool (*make_current)(void *priv); void (*release_current)(void *priv); void *priv; }; // Default/recommended parameters #define pl_opengl_params(...) (&(struct pl_opengl_params) { __VA_ARGS__ }) extern const struct pl_opengl_params pl_opengl_default_params; // Creates a new OpenGL renderer based on the given parameters. This will // internally use whatever platform-defined mechanism (WGL, X11, EGL) is // appropriate for loading the OpenGL function calls, so the user doesn't need // to pass in a `getProcAddress` callback. If `params` is left as NULL, it // defaults to `&pl_opengl_default_params`. The context must be active when // calling this function, and must remain active whenever calling any // libplacebo function on the resulting `pl_opengl` or `pl_gpu`. // // Note that creating multiple `pl_opengl` instances from the same OpenGL // context is undefined behavior. pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params); // All resources allocated from the `pl_gpu` contained by this `pl_opengl` must // be explicitly destroyed by the user before calling `pl_opengl_destroy`. void pl_opengl_destroy(pl_opengl *gl); // For a `pl_gpu` backed by `pl_opengl`, this function can be used to retrieve // the underlying `pl_opengl`. Returns NULL for any other type of `gpu`. pl_opengl pl_opengl_get(pl_gpu gpu); struct pl_opengl_framebuffer { // ID of the framebuffer, or 0 to use the context's default framebuffer. int id; // If true, then the framebuffer is assumed to be "flipped" relative to // normal GL semantics, i.e. set this to `true` if the first pixel is the // top left corner. bool flipped; }; struct pl_opengl_swapchain_params { // Set this to the platform-specific function to swap buffers, e.g. // glXSwapBuffers, eglSwapBuffers etc. This will be called internally by // `pl_swapchain_swap_buffers`. Required, unless you never call that // function. void (*swap_buffers)(void *priv); // Initial framebuffer description. This can be changed later on using // `pl_opengl_swapchain_update_fb`. struct pl_opengl_framebuffer framebuffer; // Attempt forcing a specific latency. If this is nonzero, then // `pl_swapchain_swap_buffers` will wait until fewer than N frames are "in // flight" before returning. Setting this to a high number generally // accomplished nothing, because the OpenGL driver typically limits the // number of buffers on its own. But setting it to a low number like 2 or // even 1 can reduce latency (at the cost of throughput). int max_swapchain_depth; // Arbitrary user pointer that gets passed to `swap_buffers` etc. void *priv; }; #define pl_opengl_swapchain_params(...) (&(struct pl_opengl_swapchain_params) { __VA_ARGS__ }) // Creates an instance of `pl_swapchain` tied to the active context. // Note: Due to OpenGL semantics, users *must* call `pl_swapchain_resize` // before attempting to use this swapchain, otherwise calls to // `pl_swapchain_start_frame` will fail. pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, const struct pl_opengl_swapchain_params *params); // Update the framebuffer description. After calling this function, users // *must* call `pl_swapchain_resize` before attempting to use the swapchain // again, otherwise calls to `pl_swapchain_start_frame` will fail. void pl_opengl_swapchain_update_fb(pl_swapchain sw, const struct pl_opengl_framebuffer *fb); struct pl_opengl_wrap_params { // The GLuint texture object itself. Optional. If no texture is provided, // then only the opaque framebuffer `fbo` will be wrapped, leaving the // resulting `pl_tex` object with some operations (such as sampling) being // unsupported. unsigned int texture; // The GLuint associated framebuffer. Optional. If this is not specified, // then libplacebo will attempt creating a framebuffer from the provided // texture object (if possible). // // Note: As a special case, if neither a texture nor an FBO are provided, // this is equivalent to wrapping the OpenGL default framebuffer (id 0). unsigned int framebuffer; // The image's dimensions (unused dimensions must be 0) int width; int height; int depth; // Texture-specific fields: // // Note: These are only relevant if `texture` is provided. // The GLenum for the texture target to use, e.g. GL_TEXTURE_2D. Optional. // If this is left as 0, the target is inferred from the number of // dimensions. Users may want to set this to something specific like // GL_TEXTURE_EXTERNAL_OES depending on the nature of the texture. unsigned int target; // The texture's GLint sized internal format (e.g. GL_RGBA16F). Required. int iformat; }; #define pl_opengl_wrap_params(...) (&(struct pl_opengl_wrap_params) { __VA_ARGS__ }) // Wraps an external OpenGL object into a `pl_tex` abstraction. Due to the // internally synchronized nature of OpenGL, no explicit synchronization // is needed between libplacebo `pl_tex_` operations, and host accesses to // the texture. Wrapping the same OpenGL texture multiple times is permitted. // Note that this function transfers no ownership. // // This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, // which will *not* destroy the user-provided OpenGL texture or framebuffer. // // This function may fail, in which case it returns NULL. pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params); // Analogous to `pl_opengl_wrap`, this function takes any `pl_tex` (including // ones created by `pl_tex_create`) and unwraps it to expose the underlying // OpenGL texture to the user. Note that this function transfers no ownership, // i.e. the texture object and framebuffer shall not be destroyed by the user. // // Returns the OpenGL texture. `out_target` and `out_iformat` will be updated // to hold the target type and internal format, respectively. (Optional) // // For renderable/blittable textures, `out_fbo` will be updated to the ID of // the framebuffer attached to this texture, or 0 if there is none. (Optional) unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, int *out_iformat, unsigned int *out_fbo); PL_API_END #endif // LIBPLACEBO_OPENGL_H_ libplacebo-v4.192.1/src/include/libplacebo/renderer.h000066400000000000000000000777241417677245700225040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_RENDERER_H_ #define LIBPLACEBO_RENDERER_H_ #include #include #include #include #include #include #include #include #include #include PL_API_BEGIN // Thread-safety: Unsafe typedef PL_STRUCT(pl_renderer) *pl_renderer; // Creates a new renderer object, which is backed by a GPU context. This is a // high-level object that takes care of the rendering chain as a whole, from // the source textures to the finished frame. pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu); void pl_renderer_destroy(pl_renderer *rr); // Saves the internal shader cache of this renderer into an abstract cache // object that can be saved to disk and later re-loaded to speed up // recompilation of shaders. See `pl_dispatch_save` for more information. size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache); // Load the result of a previous `pl_renderer_save` call. See // `pl_dispatch_load` for more information. // // Note: See the security warnings on `pl_pass_params.cached_program`. void pl_renderer_load(pl_renderer rr, const uint8_t *cache); enum pl_lut_type { PL_LUT_UNKNOWN = 0, PL_LUT_NATIVE, // applied to raw image contents (after fixing bit depth) PL_LUT_NORMALIZED, // applied to normalized RGB values PL_LUT_CONVERSION, // LUT fully replaces color conversion // Note: When using a PL_LUT_CONVERSION to replace the YUV->RGB conversion, // `pl_render_params.color_adjustment` is no longer applied. Similarly, // when using a PL_LUT_CONVERSION to replace the image->target color space // conversion, `pl_render_params.color_map_params` are ignored. // // Note: For LUTs attached to the output frame, PL_LUT_CONVERSION should // instead perform the inverse (RGB->native) conversion. // // Note: PL_LUT_UNKNOWN tries inferring the meaning of the LUT from the // LUT's tagged metadata, and otherwise falls back to PL_LUT_NATIVE. }; enum pl_render_stage { PL_RENDER_STAGE_FRAME, // full frame redraws, for fresh/uncached frames PL_RENDER_STAGE_BLEND, // the output blend pass (only for pl_render_image_mix) PL_RENDER_STAGE_COUNT, }; struct pl_render_info { const struct pl_dispatch_info *pass; // information about the shader enum pl_render_stage stage; // the associated render stage // For PL_RENDER_STAGE_FRAME, this specifies the chronological index // of this pass within the frame (starting at `index == 0`). // // For PL_RENDER_STAGE_BLEND, this specifies the number of frames // being blended (since that results in a different shader). int index; }; // Represents the options used for rendering. These affect the quality of // the result. struct pl_render_params { // Configures the algorithms used for upscaling and downscaling, // respectively. If left as NULL, then libplacebo will only use inexpensive // sampling (bilinear or neareast neighbour depending on the capabilities // of the hardware / texture). // // Note: Setting `downscaler` to NULL also implies `skip_anti_aliasing`, // since the built-in GPU sampling algorithms can't anti-alias. // // Note: If set to the same address as the built-in `pl_filter_bicubic`, // `pl_filter_nearest` etc.; libplacebo will also use the more efficient // direct sampling algorithm where possible without quality loss. const struct pl_filter_config *upscaler; const struct pl_filter_config *downscaler; // The number of entries for the scaler LUTs. Defaults to 64 if left unset. int lut_entries; // The anti-ringing strength to apply to non-polar filters. See the // equivalent option in `pl_sample_filter_params` for more information. float antiringing_strength; // Configures the algorithm used for frame mixing (when using // `pl_render_image_mix`). Ignored otherwise. As a special requirement, // this must be a filter config with `polar` set to false, since it's only // used for 1D mixing and thus only 1D filters are compatible. // // If set to NULL, frame mixing is disabled, in which case // `pl_render_image_mix` will use nearest-neighbour semantics. (Note that // this still goes through the redraw cache, unless you also enable // `skip_caching_single_frame`) const struct pl_filter_config *frame_mixer; // Configures the settings used to deband source textures. Leaving this as // NULL disables debanding. // // Note: The `deband_params.grain` setting is automatically adjusted to // prevent blowing up on HDR sources. The user need not account for this. const struct pl_deband_params *deband_params; // Configures the settings used to sigmoidize the image before upscaling. // This is not always used. If NULL, disables sigmoidization. const struct pl_sigmoid_params *sigmoid_params; // Configures the color adjustment parameters used to decode the color. // This can be used to apply additional artistic settings such as // desaturation, etc. If NULL, defaults to &pl_color_adjustment_neutral. const struct pl_color_adjustment *color_adjustment; // Configures the settings used to detect the peak of the source content, // for HDR sources. Has no effect on SDR content. If NULL, peak detection // is disabled. const struct pl_peak_detect_params *peak_detect_params; // Configures the settings used to tone map from HDR to SDR, or from higher // gamut to standard gamut content. If NULL, defaults to // `&pl_color_map_default_params`. const struct pl_color_map_params *color_map_params; // Configures the settings used to dither to the output depth. Leaving this // as NULL disables dithering. const struct pl_dither_params *dither_params; // Configures the settings used to handle ICC profiles, if required. If // NULL, defaults to `&pl_icc_default_params`. const struct pl_icc_params *icc_params; // Configures the settings used to simulate color blindness, if desired. // If NULL, this feature is disabled. const struct pl_cone_params *cone_params; // Configures output blending. When rendering to the final target, the // framebuffer contents will be blended using this blend mode. Requires // that the target format has PL_FMT_CAP_BLENDABLE. NULL disables blending. const struct pl_blend_params *blend_params; // List of custom user shaders / hooks. // See for more information. const struct pl_hook * const *hooks; int num_hooks; // Color mapping LUT. If present, this will be applied as part of the // image being rendered, in normalized RGB space. // // Note: In this context, PL_LUT_NATIVE means "gamma light" and // PL_LUT_NORMALIZED means "linear light". For HDR signals, normalized LUTs // are scaled so 1.0 corresponds to the `pl_color_transfer_nominal_peak`. // // Note: A PL_LUT_CONVERSION fully replaces the color adaptation from // `image` to `target`, including any tone-mapping (if necessary). It has // the same representation as PL_LUT_NATIVE, so in this case the input // and output are (respectively) non-linear light RGB. const struct pl_custom_lut *lut; enum pl_lut_type lut_type; // If the image being rendered does not span the entire size of the target, // it will be cleared explicitly using this background color (RGB). To // disable this logic, set `skip_target_clearing`. float background_color[3]; float background_transparency; // 0.0 for opaque, 1.0 for fully transparent bool skip_target_clearing; // If true, then transparent images will made opaque by painting them // against a checkerboard pattern consisting of alternating colors. If both // colors are left as {0}, they default respectively to 93% and 87% gray. bool blend_against_tiles; float tile_colors[2][3]; int tile_size; // --- Performance / quality trade-off options: // These should generally be left off where quality is desired, as they can // degrade the result quite noticeably; but may be useful for older or // slower hardware. Note that libplacebo will automatically disable // advanced features on hardware where they are unsupported, regardless of // these settings. So only enable them if you need a performance bump. // Disables anti-aliasing on downscaling. This will result in moiré // artifacts and nasty, jagged pixels when downscaling, except for some // very limited special cases (e.g. bilinear downsampling to exactly 0.5x). // // Significantly speeds up downscaling with high downscaling ratios. bool skip_anti_aliasing; // Cutoff value for polar sampling. See the equivalent option in // `pl_sample_filter_params` for more information. float polar_cutoff; // Allows the peak detection result to be delayed by up to a single frame, // which can sometimes (not always) allow skipping some otherwise redundant // sampling work. Only relevant when peak detection is active (i.e. // params->peak_detect_params is set and the source is HDR). bool allow_delayed_peak_detect; // Normally, when the size of the `target` used with `pl_render_image_mix` // changes, or the render parameters are updated, the internal cache of // mixed frames must be discarded in order to re-render all required // frames. Setting this option to `true` will skip the cache invalidation // and instead re-use the existing frames (with bilinear scaling to the new // size if necessary), which comes at a quality loss shortly after a // resize, but should make it much more smooth. bool preserve_mixing_cache; // Normally, `pl_render_image_mix` will also push single frames through the // mixer cache, in order to speed up re-draws. Enabling this option // disables that logic, causing single frames to bypass the cache. (Though // it will still read from, if they happen to already be cached) bool skip_caching_single_frame; // --- Performance tuning / debugging options // These may affect performance or may make debugging problems easier, // but shouldn't have any effect on the quality. // Disables linearization / sigmoidization before scaling. This might be // useful when tracking down unexpected image artifacts or excessing // ringing, but it shouldn't normally be necessary. bool disable_linear_scaling; // Forces the use of the "general" scaling algorithms even when using the // special-cased built-in presets like `pl_filter_bicubic`. Basically, this // disables the more efficient implementations in favor of the slower, // general-purpose ones. bool disable_builtin_scalers; // Forces the use of an ICC 3DLUT, even in cases where the use of one is // unnecessary. This is slower, but may improve the quality of the gamut // reduction step, if one is performed. bool force_icc_lut; // Ignore ICC profiles attached to either `image` or `target`. // Note: A LUT may still be generated if `force_icc_lut` is also enabled. bool ignore_icc_profiles; // Forces the use of dithering, even when rendering to 16-bit FBOs. This is // generally pretty pointless because most 16-bit FBOs have high enough // depth that rounding errors are below the human perception threshold, // but this can be used to test the dither code. bool force_dither; // Completely overrides the use of FBOs, as if there were no renderable // texture format available. This disables most features. bool disable_fbos; // If this is true, all shaders will be generated as "dynamic" shaders, // with any compile-time constants being replaced by runtime-adjustable // values. This is generally a performance loss, but has the advantage of // being able to freely change parameters without triggering shader // recompilations. // // It's a good idea to enable while presenting configurable settings to the // user, but it should be set to false once those values are "dialed in". bool dynamic_constants; // This callback is invoked for every pass successfully executed in the // process of rendering a frame. Optional. // // Note: `info` is only valid until this function returns. void (*info_callback)(void *priv, const struct pl_render_info *info); void *info_priv; // --- Deprecated aliases const struct pl_icc_params *lut3d_params PL_DEPRECATED; // fallback for `icc_params` bool force_3dlut PL_DEPRECATED; // fallback for `force_icc_lut` // --- Deprecated/removed fields bool disable_overlay_sampling PL_DEPRECATED; // no longer used }; // Bare minimum parameters, with no features enabled. This is the fastest // possible configuration, and should therefore be fine on any system. #define PL_RENDER_DEFAULTS \ /* set a frame mixer for pl_render_image_mix */ \ .frame_mixer = &pl_filter_oversample, \ .color_map_params = &pl_color_map_default_params, \ .lut_entries = 64, \ .tile_colors = {{0.93, 0.93, 0.93}, \ {0.87, 0.87, 0.87}}, \ .tile_size = 32, \ .polar_cutoff = 0.001, #define pl_render_params(...) (&(struct pl_render_params) { PL_RENDER_DEFAULTS __VA_ARGS__ }) extern const struct pl_render_params pl_render_fast_params; // This contains the default/recommended options for reasonable image quality, // while also not being too terribly slow. All of the *_params structs are // defaulted to the corresponding *_default_params, except for deband_params, // and peak_detect_params, which are both disabled by default. // // This should be fine on most integrated GPUs, but if it's too slow, // consider using `pl_render_fast_params` instead. extern const struct pl_render_params pl_render_default_params; // This contains a higher quality preset for better image quality at the cost // of quite a bit of performance. In addition to the settings implied by // `pl_render_default_params`, it sets the upscaler to `pl_filter_ewa_lanczos`, // and enables debanding and peak detection. This should only really be used // with a discrete GPU and where maximum image quality is desired. extern const struct pl_render_params pl_render_high_quality_params; // Special filter config for the built-in oversampling algorithm. This is an // opaque filter with no meaningful representation. though it has one tunable // parameter controlling the threshold at which to switch back to ordinary // nearest neighbour sampling. (See `pl_shader_sample_oversample`) extern const struct pl_filter_config pl_filter_oversample; // Backwards compatibility #define pl_oversample_frame_mixer pl_filter_oversample // A list of recommended frame mixer presets, terminated by {0} extern const struct pl_filter_preset pl_frame_mixers[]; extern const int pl_num_frame_mixers; // excluding trailing {0} // A list of recommended scaler presets, terminated by {0}. This is almost // equivalent to `pl_filter_presets` with the exception of including extra // built-in filters that don't map to the `pl_filter` architecture. extern const struct pl_filter_preset pl_scale_filters[]; extern const int pl_num_scale_filters; // excluding trailing {0} #define PL_MAX_PLANES 4 // High level description of a single slice of an image. This basically // represents a single 2D plane, with any number of components struct pl_plane { // The texture underlying this plane. The texture must be 2D, and must // have specific parameters set depending on what the plane is being used // for (see `pl_render_image`). pl_tex texture; // The preferred behaviour when sampling outside of this texture. Optional, // since the default (PL_TEX_ADDRESS_CLAMP) is very reasonable. enum pl_tex_address_mode address_mode; // Describes the number and interpretation of the components in this plane. // This defines the mapping from component index to the canonical component // order (RGBA, YCbCrA or XYZA). It's worth pointing out that this is // completely separate from `texture->format.sample_order`. The latter is // essentially irrelevant/transparent for the API user, since it just // determines which order the texture data shows up as inside the GLSL // shader; whereas this field controls the actual meaning of the component. // // Example; if the user has a plane with just {Y} and a plane with just // {Cb Cr}, and a GPU that only supports bgra formats, you would still // specify the component mapping as {0} and {1 2} respectively, even though // the GPU is sampling the data in the order BGRA. Use -1 for "ignored" // components. int components; // number of relevant components int component_mapping[4]; // semantic index of each component // Controls the sample offset, relative to the "reference" dimensions. For // an example of what to set here, see `pl_chroma_location_offset`. Note // that this is given in unit of reference pixels. For a graphical example, // imagine you have a 2x2 image with a 1x1 (subsampled) plane. Without any // shift (0.0), the situation looks like this: // // X-------X X = reference pixel // | | P = plane pixel // | P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_CENTER. If the // shift_x was instead set to -0.5, the `P` pixel would be offset to the // left by half the separation between the reference (`X` pixels), resulting // in the following: // // X-------X X = reference pixel // | | P = plane pixel // P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_LEFT. // // Note: It's recommended to fill this using `pl_chroma_location_offset` on // the chroma planes. float shift_x, shift_y; }; enum pl_overlay_mode { PL_OVERLAY_NORMAL = 0, // treat the texture as a normal, full-color texture PL_OVERLAY_MONOCHROME, // treat the texture as a single-component alpha map PL_OVERLAY_MODE_COUNT, }; struct pl_overlay_part { struct pl_rect2df src; // source coordinate with respect to `tex` struct pl_rect2d dst; // target coordinates with respect to the frame // If `mode` is PL_OVERLAY_MONOCHROME, then this specifies the color of // this overlay part. The color is multiplied into the sampled texture's // first channel. float color[4]; }; // A struct representing an image overlay (e.g. for subtitles or on-screen // status messages, controls, ...) struct pl_overlay { // The texture containing the backing data for overlay parts. Must have // `params.sampleable` set. pl_tex tex; // This controls the coloring mode of this overlay. enum pl_overlay_mode mode; // This controls the colorspace information for this overlay. The contents // of the texture / the value of `color` are interpreted according to this. struct pl_color_repr repr; struct pl_color_space color; // The number of parts for this overlay. const struct pl_overlay_part *parts; int num_parts; // (Deprecated) These fields exist for backwards compatibility. They must // not be used as the same times as `tex`. They are interpreted as an // overlay with a single part. struct pl_plane plane PL_DEPRECATED; struct pl_rect2d rect PL_DEPRECATED; // analog to `pl_overlay_part.dst` float base_color[3] PL_DEPRECATED; // analog to `pl_overlay_part.color` }; // High-level description of a complete frame, including metadata and planes struct pl_frame { // Each frame is split up into some number of planes, each of which may // carry several components and be of any size / offset. int num_planes; struct pl_plane planes[PL_MAX_PLANES]; // Color representation / encoding / semantics of this frame. struct pl_color_repr repr; struct pl_color_space color; // Optional ICC profile associated with this frame. struct pl_icc_profile profile; // Optional LUT associated with this frame. const struct pl_custom_lut *lut; enum pl_lut_type lut_type; // The logical crop / rectangle containing the valid information, relative // to the reference plane's dimensions (e.g. luma). Pixels outside of this // rectangle will ostensibly be ignored, but note that this is not a hard // guarantee. In particular, scaler filters may end up sampling outside of // this crop. This rect may be flipped, and may be partially or wholly // outside the bounds of the underlying textures. (Optional) // // Note that `pl_render_image` will map the input crop directly to the // output crop, stretching and scaling as needed. If you wish to preserve // the aspect ratio, use a dedicated function like pl_rect2df_aspect_copy. struct pl_rect2df crop; // Logical rotation of the image, with respect to the underlying planes. // For example, if this is PL_ROTATION_90, then the image will be rotated // to the right by 90° when mapping to `crop`. The actual position on-screen // is unaffected, so users should ensure that the (rotated) aspect ratio // matches the source. (Or use a helper like `pl_rect2df_aspect_set_rot`) // // Note: For `target` frames, this corresponds to a rotation of the // display, for `image` frames, this corresponds to a rotation of the // camera. // // So, as an example, target->rotation = PL_ROTATE_90 means the end user // has rotated the display to the right by 90° (meaning rendering will be // rotated 90° to the *left* to compensate), and image->rotation = // PL_ROTATE_90 means the video provider has rotated the camera to the // right by 90° (so rendering will be rotated 90° to the *right* to // compensate). pl_rotation rotation; // A list of additional overlays to render directly on top of this frame. // These overlays will be treated as though they were part of the frame // data, and can be used for things like subtitles or on-screen displays. const struct pl_overlay *overlays; int num_overlays; // Note on subsampling and plane correspondence: All planes belonging to // the same frame will only be stretched by an integer multiple (or inverse // thereof) in order to match the reference dimensions of this image. For // example, suppose you have an 8x4 image. A valid plane scaling would be // 4x2 -> 8x4 or 4x4 -> 4x4, but not 6x4 -> 8x4. So if a 6x4 plane is // given, then it would be treated like a cropped 8x4 plane (since 1.0 is // the closest scaling ratio to the actual ratio of 1.3). // // For an explanation of why this makes sense, consider the relatively // common example of a subsampled, oddly sized (e.g. jpeg) image. In such // cases, for example a 35x23 image, the 4:2:0 subsampled chroma plane // would have to end up as 17.5x11.5, which gets rounded up to 18x12 by // implementations. So in this example, the 18x12 chroma plane would get // treated by libplacebo as an oversized chroma plane - i.e. the plane // would get sampled as if it was 17.5 pixels wide and 11.5 pixels large. // Associated film grain data (see ). // // Note: This is ignored for the `target` of `pl_render_image`, since // un-applying grain makes little sense. struct pl_film_grain_data film_grain; // Ignored by libplacebo. May be useful for users. void *user_data; }; // Helper function to infer the chroma location offset for each plane in a // frame. This is equivalent to calling `pl_chroma_location_offset` on all // subsampled planes' shift_x/shift_y variables. void pl_frame_set_chroma_location(struct pl_frame *frame, enum pl_chroma_location chroma_loc); // Fills in a `pl_frame` based on a swapchain frame's FBO and metadata. void pl_frame_from_swapchain(struct pl_frame *out_frame, const struct pl_swapchain_frame *frame); // Helper function to determine if a frame is logically cropped or not. In // particular, this is useful in determining whether or not an output frame // needs to be cleared before rendering or not. bool pl_frame_is_cropped(const struct pl_frame *frame); // Helper function to reset a frame to a given RGB color. If the frame's // color representation is something other than RGB, the clear color will // be adjusted accordingly. `clear_color` should be non-premultiplied. void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame, const float clear_color[4]); // Like `pl_frame_clear_rgba` but without an alpha channel. static inline void pl_frame_clear(pl_gpu gpu, const struct pl_frame *frame, const float clear_color[3]) { const float clear_color_rgba[4] = { clear_color[0], clear_color[1], clear_color[2], 1.0 }; pl_frame_clear_rgba(gpu, frame, clear_color_rgba); } // Render a single image to a target using the given parameters. This is // fully dynamic, i.e. the params can change at any time. libplacebo will // internally detect and flush whatever caches are invalidated as a result of // changing colorspace, size etc. // // Required plane capabilities: // - Planes in `image` must be `sampleable` // - Planes in `target` must be `renderable` // // Recommended plane capabilities: (Optional, but good for performance) // - Planes in `image` should have `sample_mode` PL_TEX_SAMPLE_LINEAR // - Planes in `target` should be `storable` // - Planes in `target` should have `blit_dst` // // Note on lifetime: Once this call returns, the passed structures may be // freely overwritten or discarded by the caller, even the referenced // `pl_tex` objects may be freely reused. // // Note on overlays: `image.overlays` will be rendered directly onto the image, // which means they get affected by things like scaling and frame mixing. // `target.overlays` will also be rendered, but directly onto the target. They // don't even need to be inside `target.crop`. // // Note: `image` may be NULL, in which case `target.overlays` will still be // rendered, but nothing else. bool pl_render_image(pl_renderer rr, const struct pl_frame *image, const struct pl_frame *target, const struct pl_render_params *params); // Flushes the internal state of this renderer. This is normally not needed, // even if the image parameters, colorspace or target configuration change, // since libplacebo will internally detect such circumstances and recreate // outdated resources automatically. Doing this explicitly *may* be useful to // purge some state related to things like HDR peak detection or frame mixing, // so calling it is a good idea if the content source is expected to change // dramatically (e.g. when switching to a different file). void pl_renderer_flush_cache(pl_renderer rr); // Represents a mixture of input frames, distributed temporally. // // NOTE: Frames must be sorted by timestamp, i.e. `timestamps` must be // monotonically increasing. struct pl_frame_mix { // The number of frames in this mixture. The number of frames should be // sufficient to meet the needs of the configured frame mixer. See the // section below for more information. // // If the number of frames is 0, this call will be equivalent to // `pl_render_image` with `image == NULL`. int num_frames; // A list of the frames themselves. The frames can have different // colorspaces, configurations of planes, or even sizes. // // Note: This is a list of pointers, to avoid users having to copy // around `pl_frame` structs when re-organizing this array. const struct pl_frame **frames; // A list of unique signatures, one for each frame. These are used to // identify frames across calls to this function, so it's crucial that they // be both unique per-frame but also stable across invocations of // `pl_render_frame_mix`. const uint64_t *signatures; // A list of relative timestamps for each frame. These are relative to the // time of the vsync being drawn, i.e. this function will render the frame // that will be made visible at timestamp 0.0. The values are expected to // be normalized such that a separation of 1.0 corresponds to roughly one // nominal source frame duration. So a constant framerate video file will // always have timestamps like e.g. {-2.3, -1.3, -0.3, 0.7, 1.7, 2.7}, // using an example radius of 3. // // In cases where the framerate is variable (e.g. VFR video), the choice of // what to scale to use can be difficult to answer. A typical choice would // be either to use the canonical (container-tagged) framerate, or the // highest momentary framerate, as a reference. If all else fails, you // could also use the display's framerate. // // Note: This function assumes zero-order-hold semantics, i.e. the frame at // timestamp 0.7 is intended to remain visible until timestamp 1.7, when // the next frame replaces it. const float *timestamps; // The duration for which the vsync being drawn will be held, using the // same scale as `timestamps`. If the display has an unknown or variable // frame-rate (e.g. Adaptive Sync), then you're probably better off not // using this function and instead just painting the frames directly using // `pl_render_frame` at the correct PTS. // // As an example, if `vsync_duration` is 0.4, then it's assumed that the // vsync being painted is visible for the period [0.0, 0.4]. float vsync_duration; // Explanation of the frame mixing radius: The algorithm chosen in // `pl_render_params.frame_mixer` has a canonical radius equal to // `pl_filter_config.kernel->radius`. This means that the frame mixing // algorithm will (only) need to consult all of the frames that have a // distance within the interval [-radius, radius]. As such, the user should // include all such frames in `frames`, but may prune or omit frames that // lie outside it. // // The built-in frame mixing (`pl_render_params.frame_mixer == NULL`) has // no concept of radius, it just always needs access to the "current" and // "next" frames. }; // Helper function to calculate the frame mixing radius. static inline float pl_frame_mix_radius(const struct pl_render_params *params) { // For backwards compatibility, allow !frame_mixer->kernel if (!params->frame_mixer || !params->frame_mixer->kernel) return 0.0; return params->frame_mixer->kernel->radius; } // Render a mixture of images to the target using the given parameters. This // functions much like a generalization of `pl_render_image`, for when the API // user has more control over the frame queue / vsync loop, and can provide a // few frames from the past and future + timestamp information. // // This allows libplacebo to perform rudimentary frame mixing / interpolation, // in order to eliminate judder artifacts typically associated with // source/display frame rate mismatch. bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images, const struct pl_frame *target, const struct pl_render_params *params); PL_API_END #endif // LIBPLACEBO_RENDERER_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders.h000066400000000000000000000255751417677245700223240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_H_ #define LIBPLACEBO_SHADERS_H_ // This function defines the "direct" interface to libplacebo's GLSL shaders, // suitable for use in contexts where the user controls GLSL shader compilation // but wishes to include functions generated by libplacebo as part of their // own rendering process. This API is normally not used for operation with // libplacebo's higher-level constructs such as `pl_dispatch` or `pl_renderer`. #include PL_API_BEGIN // Thread-safety: Unsafe typedef PL_STRUCT(pl_shader) *pl_shader; struct pl_shader_params { // The `id` represents an abstract identifier for the shader, to avoid // collisions with other shaders being used as part of the same larger, // overarching shader. This is relevant for users which want to combine // multiple `pl_shader` objects together, in which case all `pl_shader` // objects should have a unique `id`. uint8_t id; // If `gpu` is non-NULL, then this `gpu` will be used to create objects // such as textures and buffers, or check for required capabilities, for // operations which depend on either of those. This is fully optional, i.e. // these GLSL primitives are designed to be used without a dependency on // `gpu` wherever possible - however, some features may not work, and will // be disabled even if requested. pl_gpu gpu; // The `index` represents an abstract frame index, which shaders may use // internally to do things like temporal dithering or seeding PRNGs. If the // user does not care about temporal dithering/debanding, or wants // deterministic rendering, this may safely be left as 0. Otherwise, it // should be incremented by 1 on successive frames. uint8_t index; // If `glsl.version` is nonzero, then this structure will be used to // determine the effective GLSL mode and capabilities. If `gpu` is also // set, then this overrides `gpu->glsl`. struct pl_glsl_version glsl; // If this is true, all constants in the shader will be replaced by // dynaminic variables. This is mainly useful to avoid recompilation for // shaders which expect to have their values change constantly. bool dynamic_constants; }; #define pl_shader_params(...) (&(struct pl_shader_params) { __VA_ARGS__ }) // Creates a new, blank, mutable pl_shader object. // // Note: Rather than allocating and destroying many shaders, users are // encouraged to reuse them (using `pl_shader_reset`) for efficiency. pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params); // Frees a pl_shader and all resources associated with it. void pl_shader_free(pl_shader *sh); // Resets a pl_shader to a blank slate, without releasing internal memory. // If you're going to be re-generating shaders often, this function will let // you skip the re-allocation overhead. void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params); // Returns whether or not a shader is in a "failed" state. Trying to modify a // shader in illegal ways (e.g. signature mismatch) will result in the shader // being marked as "failed". Since most pl_shader_ operations have a void // return type, the user can use this function to figure out whether a specific // shader operation has failed or not. This function is somewhat redundant // since `pl_shader_finalize` will also return NULL in this case. bool pl_shader_is_failed(const pl_shader sh); // Returns whether or not a pl_shader needs to be run as a compute shader. This // will never be the case unless the `pl_glsl_version` this `pl_shader` was // created using has `compute` support enabled. bool pl_shader_is_compute(const pl_shader sh); // Returns whether or not the shader has any particular output size // requirements. Some shaders, in particular those that sample from other // textures, have specific output size requirements which need to be respected // by the caller. If this is false, then the shader is compatible with every // output size. If true, the size requirements are stored into *w and *h. bool pl_shader_output_size(const pl_shader sh, int *w, int *h); // Indicates the type of signature that is associated with a shader result. // Every shader result defines a function that may be called by the user, and // this enum indicates the type of value that this function takes and/or // returns. // // Which signature a shader ends up with depends on the type of operation being // performed by a shader fragment, as determined by the user's calls. See below // for more information. enum pl_shader_sig { PL_SHADER_SIG_NONE = 0, // no input / void output PL_SHADER_SIG_COLOR, // vec4 color (normalized so that 1.0 is the ref white) // The following are only valid as input signatures: PL_SHADER_SIG_SAMPLER, // (gsampler* src_tex, vecN tex_coord) pair, // specifics depend on how the shader was generated }; // Represents a finalized shader fragment. This is not a complete shader, but a // collection of raw shader text together with description of the input // attributes, variables and vertices it expects to be available. struct pl_shader_res { // A copy of the parameters used to create the shader. struct pl_shader_params params; // A list of friendly names for the semantic operations being performed by // this shader, e.g. "color decoding" or "debanding". const char **steps; int num_steps; // As a convenience, this contains a pretty-printed version of the // above list, with entries tallied and separated by commas const char *description; // The shader text, as literal GLSL. This will always be a function // definition, such that the the function with the indicated name and // signature may be called by the user. const char *glsl; const char *name; enum pl_shader_sig input; // what the function expects enum pl_shader_sig output; // what the function returns // For compute shaders (pl_shader_is_compute), this indicates the requested // work group size. Otherwise, both fields are 0. The interpretation of // these work groups is that they're tiled across the output image. int compute_group_size[2]; // If this pass is a compute shader, this field indicates the shared memory // size requirements for this shader pass. size_t compute_shmem; // A set of input vertex attributes needed by this shader fragment. const struct pl_shader_va *vertex_attribs; int num_vertex_attribs; // A set of input variables needed by this shader fragment. const struct pl_shader_var *variables; int num_variables; // A list of input descriptors needed by this shader fragment, const struct pl_shader_desc *descriptors; int num_descriptors; // A list of compile-time constants used by this shader fragment. const struct pl_shader_const *constants; int num_constants; }; // Represents a vertex attribute. The four values will be bound to the four // corner vertices respectively, in row-wise order starting from the top left: // data[0] data[1] // data[2] data[3] struct pl_shader_va { struct pl_vertex_attrib attr; // VA type, excluding `offset` and `location` const void *data[4]; }; // Represents a bound shared variable / descriptor struct pl_shader_var { struct pl_var var; // the underlying variable description const void *data; // the raw data (as per `pl_var_host_layout`) bool dynamic; // if true, the value is expected to change frequently }; struct pl_buffer_var { struct pl_var var; struct pl_var_layout layout; }; typedef uint16_t pl_memory_qualifiers; enum { PL_MEMORY_COHERENT = 1 << 0, // supports synchronization across shader invocations PL_MEMORY_VOLATILE = 1 << 1, // all writes are synchronized automatically // Note: All descriptors are also implicitly assumed to have the 'restrict' // memory qualifier. There is currently no way to override this behavior. }; struct pl_shader_desc { struct pl_desc desc; // descriptor type, excluding `int binding` struct pl_desc_binding binding; // contents of the descriptor binding // For PL_DESC_BUF_UNIFORM/STORAGE, this specifies the layout of the // variables contained by a buffer. Ignored for the other descriptor types struct pl_buffer_var *buffer_vars; int num_buffer_vars; // For storage images and buffers, this specifies additional memory // qualifiers on the descriptor. It's highly recommended to always use // at least PL_MEMORY_RESTRICT. Ignored for other descriptor types. pl_memory_qualifiers memory; }; // Represents a compile-time constant. This can be lowered to a specialization // constant to support cheaper recompilations. struct pl_shader_const { enum pl_var_type type; const char *name; const void *data; // If true, this constant *must* be a compile-time constant, which // basically just overrides `pl_shader_params.dynamic_constants`. Useful // for constants which will serve as inputs to e.g. array sizes. bool compile_time; }; // Finalize a pl_shader. It is no longer mutable at this point, and any further // attempts to modify it result in an error. (Functions which take a `const // pl_shader` argument do not modify the shader and may be freely // called on an already-finalized shader) // // The returned pl_shader_res is bound to the lifetime of the pl_shader - and // will only remain valid until the pl_shader is freed or reset. This function // may be called multiple times, and will produce the same result each time. // // This function will return NULL if the shader is considered to be in a // "failed" state (see pl_shader_is_failed). const struct pl_shader_res *pl_shader_finalize(pl_shader sh); // Shader objects represent abstract resources that shaders need to manage in // order to ensure their operation. This could include shader storage buffers, // generated lookup textures, or other sorts of configured state. The body // of a shader object is fully opaque; but the user is in charge of cleaning up // after them and passing them to the right shader passes. // // Note: pl_shader_obj objects must be initialized to NULL by the caller. typedef PL_STRUCT(pl_shader_obj) *pl_shader_obj; void pl_shader_obj_destroy(pl_shader_obj *obj); PL_API_END #endif // LIBPLACEBO_SHADERS_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/000077500000000000000000000000001417677245700221355ustar00rootroot00000000000000libplacebo-v4.192.1/src/include/libplacebo/shaders/colorspace.h000066400000000000000000000446341417677245700244530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_COLORSPACE_H_ #define LIBPLACEBO_SHADERS_COLORSPACE_H_ // Color space transformation shaders. These all input and output a color // value (PL_SHADER_SIG_COLOR). #include #include #include PL_API_BEGIN // Transform the input color, in its given representation, to ensure // compatibility with the indicated alpha mode. Mutates `repr` to reflect the // change. Note that this is a no-op if the input is PL_ALPHA_UNKNOWN. void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr, enum pl_alpha_mode mode); // Colorspace reshaping for PL_COLOR_SYSTEM_DOLBYVISION. Note that this is done // automatically by `pl_shader_decode_color` for PL_COLOR_SYSTEM_DOLBYVISION. void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data); // Decode the color into normalized RGB, given a specified color_repr. This // also takes care of additional pre- and post-conversions requires for the // "special" color systems (XYZ, BT.2020-C, etc.). If `params` is left as NULL, // it defaults to &pl_color_adjustment_neutral. // // Note: This function always returns PC-range RGB with independent alpha. // It mutates the pl_color_repr to reflect the change. void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params); // Encodes a color from normalized, PC-range, independent alpha RGB into a // given representation. That is, this performs the inverse operation of // `pl_shader_decode_color` (sans color adjustments). void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr); // Linearize (expand) `vec4 color`, given a specified color space. In essence, // this corresponds to the ITU-R EOTF. // // Note: Unlike the ITU-R EOTF, it never includes the OOTF - even for systems // where the EOTF includes the OOTF (such as HLG). void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp); // Delinearize (compress), given a color space as output. This loosely // corresponds to the inverse EOTF (not the OETF) in ITU-R terminology, again // assuming a reference monitor. void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp); struct pl_sigmoid_params { // The center (bias) of the sigmoid curve. Must be between 0.0 and 1.0. // If left as NULL, defaults to 0.75 float center; // The slope (steepness) of the sigmoid curve. Must be between 1.0 and 20.0. // If left as NULL, defaults to 6.5. float slope; }; #define PL_SIGMOID_DEFAULTS \ .center = 0.75, \ .slope = 6.50, #define pl_sigmoid_params(...) (&(struct pl_sigmoid_params) { PL_SIGMOID_DEFAULTS __VA_ARGS__ }) extern const struct pl_sigmoid_params pl_sigmoid_default_params; // Applies a sigmoidal color transform to all channels. This helps avoid // ringing artifacts during upscaling by bringing the color information closer // to neutral and away from the extremes. If `params` is NULL, it defaults to // &pl_sigmoid_default_params. // // Warning: This function clamps the input to the interval [0,1]; and as such // it should *NOT* be used on already-decoded high-dynamic range content. void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); // This performs the inverse operation to `pl_shader_sigmoidize`. void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); struct pl_peak_detect_params { // Smoothing coefficient for the detected values. This controls the time // parameter (tau) of an IIR low pass filter. In other words, it represent // the cutoff period (= 1 / cutoff frequency) in frames. Frequencies below // this length will be suppressed. This helps block out annoying // "sparkling" or "flickering" due to small variations in frame-to-frame // brightness. // // If left unset, this defaults to 100.0. float smoothing_period; // In order to avoid reacting sluggishly on scene changes as a result of // the low-pass filter, we disable it when the difference between the // current frame brightness and the average frame brightness exceeds a // given threshold difference. But rather than a single hard cutoff, which // would lead to weird discontinuities on fades, we gradually disable it // over a small window of brightness ranges. These parameters control the // lower and upper bounds of this window, in dB. // // The default values are 5.5 and 10.0, respectively. To disable this logic // entirely, set either one to a negative value. float scene_threshold_low; float scene_threshold_high; // In order to avoid clipping on fade-ins or other sudden brightness // increases, we always over-estimate the peak brightness (in percent) // by this amount, as a percentage of the actual measured peak. If left // as 0.0, this logic is disabled. The default value is 0.05. float overshoot_margin; // To avoid over-tone-mapping very dark scenes (or black frames), this // imposes a hard lower bound on the detected peak. If left as 0.0, it // instead defaults to a value of 1.0. float minimum_peak; }; #define PL_PEAK_DETECT_DEFAULTS \ .smoothing_period = 100.0, \ .scene_threshold_low = 5.5, \ .scene_threshold_high = 10.0, \ .overshoot_margin = 0.05, \ .minimum_peak = 1.0, #define pl_peak_detect_params(...) (&(struct pl_peak_detect_params) { PL_PEAK_DETECT_DEFAULTS __VA_ARGS__ }) extern const struct pl_peak_detect_params pl_peak_detect_default_params; // This function can be used to measure the CLL and FALL of a video // source automatically, using a compute shader. The measured values are // smoothed automatically (depending on the parameters), so to keep track of // the measured results over time, a tone mapping shader state object is used // to hold the state. Returns false on failure initializing the tone mapping // object, or if compute shaders are not supported. // // It's important that the same shader object is used for successive frames // belonging to the same source. If the source changes (e.g. due to a file // change or seek), the user should reset it with `pl_reset_detected_peak` (or // destroy it and use a new state object). // // The parameter `csp` holds the representation of the color values that are // the input to this function. (They must already be in decoded RGB form, i.e. // alternate color representations are not supported) bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp, pl_shader_obj *state, const struct pl_peak_detect_params *params); // After dispatching the above shader, this function *may* be used to read out // the detected CLL and FALL directly (in PL_HDR_NORM units). If the shader // has never been dispatched yet, i.e. no information is available, this will // return false. // // Note: This function will block until the shader object is no longer in use // by the GPU, so its use should be avoided due to performance reasons. This // function is *not* needed when the user only wants to use `pl_shader_color_map`, // since that can ingest the results from the state object directly. It only // serves as a utility/debugging function. bool pl_get_detected_peak(const pl_shader_obj state, float *out_cll, float *out_fall); // Resets the peak detection state in a given tone mapping state object. This // is not equal to `pl_shader_obj_destroy`, because it does not destroy any // state used by `pl_shader_tone_map`. void pl_reset_detected_peak(pl_shader_obj state); // Deprecated. See for replacements. enum pl_tone_mapping_algorithm { PL_TONE_MAPPING_CLIP, PL_TONE_MAPPING_MOBIUS, PL_TONE_MAPPING_REINHARD, PL_TONE_MAPPING_HABLE, PL_TONE_MAPPING_GAMMA, PL_TONE_MAPPING_LINEAR, PL_TONE_MAPPING_BT_2390, PL_TONE_MAPPING_ALGORITHM_COUNT, }; enum pl_tone_map_mode { // Picks the best tone-mapping mode based on internal heuristics. PL_TONE_MAP_AUTO, // Per-channel tone-mapping in RGB. Guarantees no clipping and heavily // desaturates the output, but distorts the colors quite significantly. PL_TONE_MAP_RGB, // Tone-mapping is performed on the brightest component found in the // signal. Good at preserving details in highlights, but has a tendency to // crush blacks. PL_TONE_MAP_MAX, // Tone-map per-channel for highlights and linearly (luma-based) for // midtones/shadows, based on a fixed gamma 2.4 coefficient curve. PL_TONE_MAP_HYBRID, // Tone-map linearly on the luma component, and adjust (desaturate) the // chromaticities to compensate using a simple constant factor. This is // essentially the mode used in ITU-R BT.2446 method A. PL_TONE_MAP_LUMA, PL_TONE_MAP_MODE_COUNT, }; enum pl_gamut_mode { // Do nothing, simply clip out-of-range colors to the RGB volume. PL_GAMUT_CLIP, // Equal to PL_GAMUT_CLIP but also highlights out-of-gamut colors (by // coloring them pink). PL_GAMUT_WARN, // Linearly reduces content brightness to preserves saturated details, // followed by clipping the remaining out-of-gamut colors. As the name // implies, this makes everything darker, but provides a good balance // between preserving details and colors. PL_GAMUT_DARKEN, // Hard-desaturates out-of-gamut colors towards white, while preserving the // luminance. Has a tendency to shift colors. PL_GAMUT_DESATURATE, PL_GAMUT_MODE_COUNT, }; struct pl_color_map_params { // The rendering intent to use for gamut mapping. Note that this does not // affect tone mapping, which is always applied independently (to get the // equivalent of colorimetric intent for tone mapping, set the function to // NULL). // // Defaults to PL_INTENT_RELATIVE_COLORIMETRIC enum pl_rendering_intent intent; // How to handle out-of-gamut colors when changing the content primaries. enum pl_gamut_mode gamut_mode; // Function and configuration used for tone-mapping. For non-tunable // functions, the `param` is ignored. If the tone mapping parameter is // left as 0.0, the tone-mapping curve's preferred default parameter will // be used. The default function is pl_tone_map_auto. // // Note: This pointer changing invalidates the LUT, so make sure to only // use stable (or static) storage for the pl_tone_map_function. const struct pl_tone_map_function *tone_mapping_function; enum pl_tone_map_mode tone_mapping_mode; float tone_mapping_param; // If true, and supported by the given tone mapping function, libplacebo // will perform inverse tone mapping to expand the dynamic range of a // signal. libplacebo is not liable for any HDR-induced eye damage. bool inverse_tone_mapping; // Extra crosstalk factor to apply before tone-mapping. Optional. May help // to improve the appearance of very bright, monochromatic highlights. float tone_mapping_crosstalk; // Tone mapping LUT size. Defaults to 256. Note that when combining // this with peak detection, the resulting LUT is actually squared, so // avoid setting it too high. int lut_size; // --- Debugging options // Force the use of a full tone-mapping LUT even for functions that have // faster pure GLSL replacements (e.g. clip). bool force_tone_mapping_lut; // --- Deprecated fields enum pl_tone_mapping_algorithm tone_mapping_algo PL_DEPRECATED; float desaturation_strength PL_DEPRECATED; float desaturation_exponent PL_DEPRECATED; float desaturation_base PL_DEPRECATED; float max_boost PL_DEPRECATED; bool gamut_warning PL_DEPRECATED; // replaced by PL_GAMUT_WARN bool gamut_clipping PL_DEPRECATED; // replaced by PL_GAMUT_DESATURATE }; #define PL_COLOR_MAP_DEFAULTS \ .intent = PL_INTENT_RELATIVE_COLORIMETRIC, \ .gamut_mode = PL_GAMUT_DARKEN, \ .tone_mapping_function = &pl_tone_map_auto, \ .tone_mapping_mode = PL_TONE_MAP_AUTO, \ .tone_mapping_crosstalk = 0.04, \ .lut_size = 256, #define pl_color_map_params(...) (&(struct pl_color_map_params) { PL_COLOR_MAP_DEFAULTS __VA_ARGS__ }) extern const struct pl_color_map_params pl_color_map_default_params; // Maps `vec4 color` from one color space to another color space according // to the parameters (described in greater depth above). If `params` is left // as NULL, it defaults to `&pl_color_map_default_params`. If `prelinearized` // is true, the logic will assume the input has already been linearized by the // caller (e.g. as part of a previous linear light scaling operation). // // `tone_mapping_state` is required if tone mapping is desired, and will be // used to store state related to tone mapping. Note that this is the same // state object used by the peak detection shader (`pl_shader_detect_peak`). If // that function has been called on the same state object before this one, the // detected values may be used to guide the tone mapping algorithm. // // Note: The peak detection state object is only updated after the shader is // dispatched, so if `pl_shader_detect_peak` is called as part of the same // shader as `pl_shader_color_map`, the results will end up delayed by one // frame. If frame-level accuracy is desired, then users should call // `pl_shader_detect_peak` separately and dispatch the resulting shader // *before* dispatching this one. void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, pl_shader_obj *tone_mapping_state, bool prelinearized); // Applies a set of cone distortion parameters to `vec4 color` in a given color // space. This can be used to simulate color blindness. See `pl_cone_params` // for more information. void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp, const struct pl_cone_params *params); enum pl_dither_method { // Dither with blue noise. Very high quality, but requires the use of a // LUT. Warning: Computing a blue noise texture with a large size can be // very slow, however this only needs to be performed once. Even so, using // this with a `lut_size` greater than 6 is generally ill-advised. This is // the preferred/default dither method. PL_DITHER_BLUE_NOISE, // Dither with an ordered (bayer) dither matrix, using a LUT. Low quality, // and since this also uses a LUT, there's generally no advantage to picking // this instead of `PL_DITHER_BLUE_NOISE`. It's mainly there for testing. PL_DITHER_ORDERED_LUT, // The same as `PL_DITHER_ORDERED_LUT`, but uses fixed function math instead // of a LUT. This is faster, but only supports a fixed dither matrix size // of 16x16 (equal to a `lut_size` of 4). Requires GLSL 130+. PL_DITHER_ORDERED_FIXED, // Dither with white noise. This does not require a LUT and is fairly cheap // to compute. Unlike the other modes it doesn't show any repeating // patterns either spatially or temporally, but the downside is that this // is visually fairly jarring due to the presence of low frequencies in the // noise spectrum. Used as a fallback when the above methods are not // available. PL_DITHER_WHITE_NOISE, PL_DITHER_METHOD_COUNT, }; struct pl_dither_params { // The source of the dither noise to use. enum pl_dither_method method; // For the dither methods which require the use of a LUT, this controls // the size of the LUT (base 2). If left as NULL, this defaults to 6, which // is equivalent to a 64x64 dither matrix. Must not be larger than 8. int lut_size; // Enables temporal dithering. This reduces the persistence of dithering // artifacts by perturbing the dithering matrix per frame. // Warning: This can cause nasty aliasing artifacts on some LCD screens. bool temporal; }; #define PL_DITHER_DEFAULTS \ .method = PL_DITHER_BLUE_NOISE, \ .lut_size = 6, \ /* temporal dithering commonly flickers on LCDs */ \ .temporal = false, #define pl_dither_params(...) (&(struct pl_dither_params) { PL_DITHER_DEFAULTS __VA_ARGS__ }) extern const struct pl_dither_params pl_dither_default_params; // Dither the colors to a lower depth, given in bits. This can be used on input // colors of any precision. Basically, this rounds the colors to only linear // multiples of the stated bit depth. The average intensity of the result // will not change (i.e., the dither noise is balanced in both directions). // If `params` is NULL, it defaults to &pl_dither_default_params. // // For the dither methods which require the use of a LUT, `dither_state` must // be set to a valid pointer. To avoid thrashing the resource, users should // avoid trying to re-use the same LUT for different dither configurations. If // passed as NULL, libplacebo will automatically fall back to dither algorithms // that don't require the use of a LUT. // // Warning: This dithering algorithm is not gamma-invariant; so using it for // very low bit depths (below 4 or so) will noticeably increase the brightness // of the resulting image. When doing low bit depth dithering for aesthetic // purposes, it's recommended that the user explicitly (de)linearize the colors // before and after this algorithm. void pl_shader_dither(pl_shader sh, int new_depth, pl_shader_obj *dither_state, const struct pl_dither_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_COLORSPACE_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/custom.h000066400000000000000000000265501417677245700236300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_CUSTOM_H_ #define LIBPLACEBO_SHADERS_CUSTOM_H_ #include // Functions for writing custom shaders and hooking them into the `pl_renderer` // pipeline, as well as compatibility functions for parsing shaders in mpv // format. #include #include #include PL_API_BEGIN // Parameters describing custom shader text to be embedded into a `pl_shader` // object. All of the strings are optional and can be left as NULL, but without // a `body` in particular, the shader will do nothing useful on its own. struct pl_custom_shader { // The prelude contains text such as extra #defines, #extension pragmas, // or other parts of the shader that must be placed at the very // beginning (before input layout declarations etc.) // // Note: #extension pragmas do not need to be emitted to enable support for // resource types already attached to the shader (e.g. SSBOs), compute // shaders, or GPU capabilities known to libplacebo (e.g. subgroups). const char *prelude; // The header contains text such as helper function definitions, extra // uniforms, shared memory variables or buffer descriptions. const char *header; // A friendly name for the shader. (Optional) const char *description; // The "primary" GLSL code. This will be effectively appended to the "main" // function. It lives in an environment given by the `input` signature, and // is expected to return results in a way given by the `output` signature. // // Note: In the case of PL_SHADER_SIG_COLOR, the output `vec4 color` is // allocated by `pl_shader_custom`, the user merely needs to assign to it. // // Note: For ease of development it can be useful to have the main logic // live inside a helper function defined as part of `header`, and specify // the `body` as a single line that simply calls the helper function. const char *body; enum pl_shader_sig input; enum pl_shader_sig output; // Extra descriptors, variables and vertex attributes to attach to the // resulting `pl_shader_res`. const struct pl_shader_desc *descriptors; int num_descriptors; const struct pl_shader_var *variables; int num_variables; const struct pl_shader_va *vertex_attribs; int num_vertex_attribs; const struct pl_shader_const *constants; int num_constants; // If true, this shader must be a compute shader. The desired workgroup // size and shared memory usage can be optionally specified, or 0 if no // specific work group size or shared memory size restrictions apply. // // See also: `pl_shader_res.compute_group_size` bool compute; size_t compute_shmem; int compute_group_size[2]; // Fixes the output size requirements of the shader to exact dimensions. // Optional, if left as 0, means the shader can be dispatched at any size. int output_w; int output_h; }; // Append custom shader code, including extra descriptors and variables, to an // existing `pl_shader` object. Returns whether successful. This function may // fail in the event that e.g. the custom shader requires compute shaders on // an unsupported GPU, or exceeds the GPU's shared memory capabilities. bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params); // Which "rendering stages" are available for user shader hooking purposes. // Except where otherwise noted, all stages are "non-resizable", i.e. the // shaders already have specific output size requirements. enum pl_hook_stage { // Hook stages for the untouched planes, as made available by the source. // These are all resizable, i.e. there are no specific output stage // requirements. PL_HOOK_RGB_INPUT = 1 << 0, PL_HOOK_LUMA_INPUT = 1 << 1, PL_HOOK_CHROMA_INPUT = 1 << 2, PL_HOOK_ALPHA_INPUT = 1 << 3, PL_HOOK_XYZ_INPUT = 1 << 4, // Hook stages for the scaled/aligned planes PL_HOOK_CHROMA_SCALED = 1 << 5, PL_HOOK_ALPHA_SCALED = 1 << 6, PL_HOOK_NATIVE = 1 << 7, // Combined image in its native color space PL_HOOK_RGB = 1 << 8, // After conversion to RGB (resizable) PL_HOOK_LINEAR = 1 << 9, // After linearization but before scaling PL_HOOK_SIGMOID = 1 << 10, // After sigmoidization PL_HOOK_PRE_OVERLAY = 1 << 11, // Before applying on-image overlays PL_HOOK_PRE_KERNEL = 1 << 12, // Immediately before the main scaler kernel (after overlays) PL_HOOK_POST_KERNEL = 1 << 13, // Immediately after the main scaler kernel PL_HOOK_SCALED = 1 << 14, // After scaling, before color management PL_HOOK_OUTPUT = 1 << 15, // After color management, before dithering }; // Returns true if a given hook stage is resizable static inline bool pl_hook_stage_resizable(enum pl_hook_stage stage) { switch (stage) { case PL_HOOK_RGB_INPUT: case PL_HOOK_LUMA_INPUT: case PL_HOOK_CHROMA_INPUT: case PL_HOOK_ALPHA_INPUT: case PL_HOOK_XYZ_INPUT: case PL_HOOK_NATIVE: case PL_HOOK_RGB: return true; case PL_HOOK_CHROMA_SCALED: case PL_HOOK_ALPHA_SCALED: case PL_HOOK_LINEAR: case PL_HOOK_SIGMOID: case PL_HOOK_PRE_OVERLAY: case PL_HOOK_PRE_KERNEL: case PL_HOOK_POST_KERNEL: case PL_HOOK_SCALED: case PL_HOOK_OUTPUT: return false; } abort(); } // The different forms of communicating image data between the renderer and // the hooks enum pl_hook_sig { PL_HOOK_SIG_NONE, // No data is passed, no data is received/returned PL_HOOK_SIG_COLOR, // `vec4 color` already pre-sampled in a `pl_shader` PL_HOOK_SIG_TEX, // `pl_tex` containing the image data PL_HOOK_SIG_COUNT, }; struct pl_hook_params { // GPU objects associated with the `pl_renderer`, which the user may // use for their own purposes. pl_gpu gpu; pl_dispatch dispatch; // Helper function to fetch a new temporary texture, using renderer-backed // storage. This is guaranteed to have sane image usage requirements and a // 16-bit or floating point format. The user does not need to free/destroy // this texture in any way. May return NULL. pl_tex (*get_tex)(void *priv, int width, int height); void *priv; // Which stage triggered the hook to run. enum pl_hook_stage stage; // For `PL_HOOK_SIG_COLOR`, this contains the existing shader object with // the color already pre-sampled into `vec4 color`. The user may modify // this as much as they want, as long as they don't dispatch/finalize/reset // it. // // Note that this shader might have specific output size requirements, // depending on the exact shader stage hooked by the user, and may already // be a compute shader. pl_shader sh; // For `PL_HOOK_SIG_TEX`, this contains the texture that the user should // sample from. // // Note: This texture object is owned by the renderer, and users must not // modify its contents. It will not be touched for the duration of a frame, // but the contents are lost in between frames. pl_tex tex; // The effective current rectangle of the image we're rendering in this // shader, i.e. the effective rect of the content we're interested in, // as a crop of either `sh` or `tex` (depending on the signature). // // Note: This is still set even for `PL_HOOK_SIG_NONE`! struct pl_rect2df rect; // The current effective colorspace and representation, of either the // pre-sampled color (in `sh`), or the contents of `tex`, respectively. // // Note: This is still set even for `PL_HOOK_SIG_NONE`! struct pl_color_repr repr; struct pl_color_space color; int components; // The (cropped) source and destination rectangles of the overall // rendering. These are functionallty equivalent to `image.crop` and // `target.crop`, respectively, but `src_rect` in particular may change as // a result of previous hooks being executed. (e.g. prescalers) struct pl_rect2df src_rect; struct pl_rect2d dst_rect; }; struct pl_hook_res { // If true, the hook is assumed to have "failed" or errored in some way, // and all other fields are ignored. bool failed; // What type of output this hook is returning. // Note: If this is `PL_HOOK_SIG_NONE`, all other fields are ignored. enum pl_hook_sig output; // For `PL_HOOK_SIG_COLOR`, this *must* be set to a valid `pl_shader` // object containing the sampled color value (i.e. with an output signature // of `PL_SHADER_SIG_COLOR`), and *should* be allocated from the given // `pl_dispatch` object. Ignored otherwise. pl_shader sh; // For `PL_HOOK_SIG_TEX`, this *must* contain the texture object containing // the result of rendering the hook. This *should* be a texture allocated // using the given `get_tex` callback, to ensure the format and texture // usage flags are compatible with what the renderer expects. pl_tex tex; // For shaders that return some sort of output, this contains the // new/altered versions of the existing "current texture" metadata. struct pl_color_repr repr; struct pl_color_space color; int components; // This contains the new effective rect of the contents. This may be // different from the original `rect` for resizable passes. Ignored for // non-resizable passes. struct pl_rect2df rect; }; // Struct describing a hook. // // Note: Users may freely create their own instances of this struct, there is // nothing particularly special about `pl_mpv_user_shader_parse`. struct pl_hook { enum pl_hook_stage stages; // Which stages to hook on enum pl_hook_sig input; // Which input signature this hook expects void *priv; // Arbitrary user context // Called at the beginning of passes, to reset/initialize the hook. (Optional) void (*reset)(void *priv); // The hook function itself. Called by the renderer at any of the indicated // hook stages. See `pl_hook_res` for more info on the return values. struct pl_hook_res (*hook)(void *priv, const struct pl_hook_params *params); }; // Compatibility layer with `mpv` user shaders. See the mpv man page for more // information on the format. Will return `NULL` if the shader fails parsing. // // The resulting `pl_hook` objects should be destroyed with the corresponding // destructor when no longer needed. const struct pl_hook *pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len); void pl_mpv_user_shader_destroy(const struct pl_hook **hook); PL_API_END #endif // LIBPLACEBO_SHADERS_CUSTOM_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/film_grain.h000066400000000000000000000122271417677245700244210ustar00rootroot00000000000000/* * This file is part of libplacebo, which is normally licensed under the terms * of the LGPL v2.1+. However, this file (film_grain.h) is also available under * the terms of the more permissive MIT license: * * Copyright (c) 2018-2019 Niklas Haas * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef LIBPLACEBO_SHADERS_FILM_GRAIN_H_ #define LIBPLACEBO_SHADERS_FILM_GRAIN_H_ // Film grain synthesis shaders for AV1 / H.274. #include #include #include #include PL_API_BEGIN enum pl_film_grain_type { PL_FILM_GRAIN_NONE = 0, PL_FILM_GRAIN_AV1, PL_FILM_GRAIN_H274, PL_FILM_GRAIN_COUNT, }; // AV1 film grain parameters. For the exact meaning of these, see the AV1 // specification (section 6.8.20). struct pl_av1_grain_data { int num_points_y; uint8_t points_y[14][2]; // [n][0] = value, [n][1] = scaling bool chroma_scaling_from_luma; int num_points_uv[2]; // should be {0} for grayscale images uint8_t points_uv[2][10][2]; // like points_y for points_uv[0, 1] = u, v int scaling_shift; int ar_coeff_lag; int8_t ar_coeffs_y[24]; int8_t ar_coeffs_uv[2][25]; int ar_coeff_shift; int grain_scale_shift; int8_t uv_mult[2]; int8_t uv_mult_luma[2]; int16_t uv_offset[2]; // 9-bit value, range [-256, 255] bool overlap; }; // H.274 film grain parameters. For the exact meaning of these, see the H.274 // specification (section 8.5). struct pl_h274_grain_data { int model_id; int blending_mode_id; int log2_scale_factor; bool component_model_present[3]; uint16_t num_intensity_intervals[3]; uint8_t num_model_values[3]; const uint8_t *intensity_interval_lower_bound[3]; const uint8_t *intensity_interval_upper_bound[3]; const int16_t (*comp_model_value[3])[6]; }; // Tagged union for film grain data struct pl_film_grain_data { enum pl_film_grain_type type; // film grain type uint64_t seed; // shared seed value union { // Warning: These values are not sanity-checked at all, Invalid grain // data results in undefined behavior! struct pl_av1_grain_data av1; struct pl_h274_grain_data h274; } params; }; // Options for the `pl_shader_film_grain` call. struct pl_film_grain_params { // Required for all film grain types: struct pl_film_grain_data data; // film grain data pl_tex tex; // texture to sample from struct pl_color_repr *repr; // underlying color representation (see notes) int components; int component_mapping[4]; // same as `struct pl_plane` // Notes for `repr`: // - repr->bits affects the rounding for grain generation // - repr->levels affects whether or not we clip to full range or not // - repr->sys affects the interpretation of channels // - *repr gets normalized by this shader, which is why it's a pointer // Required for PL_FILM_GRAIN_AV1 only: pl_tex luma_tex; // "luma" texture (see notes) int luma_comp; // index of luma in `luma_tex` // Notes for `luma_tex`: // - `luma_tex` must be specified if the `tex` does not itself contain the // "luma-like" component. For XYZ systems, the Y channel is the luma // component. For RGB systems, the G channel is. }; #define pl_film_grain_params(...) (&(struct pl_film_grain_params) { __VA_ARGS__ }) // Test if film grain needs to be applied. This is a helper function that users // can use to decide whether or not `pl_shader_film_grain` needs to be called, // based on the given grain metadata. bool pl_needs_film_grain(const struct pl_film_grain_params *params); // Sample from a texture while applying film grain at the same time. // `grain_state` must be unique for every plane configuration, as it may // contain plane-dependent state. // // Returns false on any error, or if film grain generation is not supported // due to GLSL limitations. bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_FILM_GRAIN_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/icc.h000066400000000000000000000133411417677245700230460ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_ICC_H_ #define LIBPLACEBO_SHADERS_ICC_H_ // Functions for generating and applying ICC-derived 3DLUTs #include #include PL_API_BEGIN // ICC profiles struct pl_icc_params { // The rendering intent to use when computing the color transformation. A // recommended value is PL_INTENT_RELATIVE_COLORIMETRIC for color-accurate // video reproduction, or PL_INTENT_PERCEPTUAL for profiles containing // meaningful perceptual mapping tables. enum pl_rendering_intent intent; // The size of the 3DLUT to generate. If left as NULL, these individually // default to 64, which is the recommended default for all three. size_t size_r, size_g, size_b; // If true, the detected contrast (from the output ICC profile) will be // forwarded to to the input of the 3DLUT as well. This will result in the // input curve being tuned to the given contrast, rather than the tagged // metadata (or 1000:1 by default for SDR curves). Has no effect when the // input color space is also an ICC profile. bool use_display_contrast; }; #define PL_ICC_DEFAULTS \ .intent = PL_INTENT_RELATIVE_COLORIMETRIC, \ .size_r = 64, \ .size_g = 64, \ .size_b = 64, \ .use_display_contrast = true, #define pl_icc_params(...) (&(struct pl_icc_params) { PL_ICC_DEFAULTS __VA_ARGS__ }) extern const struct pl_icc_params pl_icc_default_params; struct pl_icc_color_space { // The nominal, closest approximation representation of the color profile, // as permitted by `pl_color_space` enums. This will be used as a fallback // in the event that an ICC profile is absent, or that parsing the ICC // profile fails. This is also that will be returned for the corresponding // field in `pl_icc_result` when the ICC profile is in use. struct pl_color_space color; // The ICC profile itself. (Optional) struct pl_icc_profile profile; }; struct pl_icc_result { // The source color space. This is the color space that the colors should // actually be in at the point in time that they're ingested by the 3DLUT. // This may differ from the `pl_color_space color` specified in the // `pl_icc_color_space`. Users should make sure to apply // `pl_shader_color_map` in order to get the colors into this format before // applying `pl_icc_apply`. // // Note: `pl_shader_color_map` is a no-op when the source and destination // color spaces are the same, so this can safely be used without disturbing // the colors in the event that an ICC profile is actually in use. struct pl_color_space src_color; // The destination color space. This is the color space that the colors // will (nominally) be in at the time they exit the 3DLUT. struct pl_color_space dst_color; }; // Updates/generates a 3DLUT based on ICC profiles. Returns success. If true, // `out` will be updated to a struct describing the color space chosen for the // input and output of the 3DLUT. (See `pl_icc_color_space`) If `params` is // NULL, it defaults to &pl_icc_default_params. // // Note: This function must always be called before `pl_icc_apply`, on the // same `pl_shader` object, The only reason it's separate from `pl_icc_apply` // is to give users a chance to adapt the input colors to the color space // chosen by the ICC profile before applying it. bool pl_icc_update(pl_shader sh, const struct pl_icc_color_space *src, const struct pl_icc_color_space *dst, pl_shader_obj *icc, struct pl_icc_result *out, const struct pl_icc_params *params); // Actually applies a 3DLUT as generated by `pl_icc_update`. The reason this is // separated from `pl_icc_update` is so that the user has the chance to // correctly map the colors into the specified `src_color` space. This should // be called only on the `pl_shader_obj` previously updated by `pl_icc_update`, // and only when that function returned true. void pl_icc_apply(pl_shader sh, pl_shader_obj *icc); // Backwards compatibility aliases #define pl_3dlut_params pl_icc_params #define pl_3dlut_default_params pl_icc_default_params #define pl_3dlut_profile pl_icc_color_space #define pl_3dlut_result pl_icc_result static PL_DEPRECATED inline bool pl_3dlut_update(pl_shader sh, const struct pl_icc_color_space *src, const struct pl_icc_color_space *dst, pl_shader_obj *lut3d, struct pl_icc_result *out, const struct pl_icc_params *params) { return pl_icc_update(sh, src, dst, lut3d, out, params); } static PL_DEPRECATED inline void pl_3dlut_apply(pl_shader sh, pl_shader_obj *lut3d) { return pl_icc_apply(sh, lut3d); } PL_API_END #endif // LIBPLACEBO_SHADERS_ICC_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/lut.h000066400000000000000000000061371417677245700231210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_LUT_H_ #define LIBPLACEBO_SHADERS_LUT_H_ // Shaders for loading and applying arbitrary custom 1D/3DLUTs #include #include PL_API_BEGIN // Struct defining custom LUTs // // Note: Users may freely create their own instances of this struct, there is // nothing particularly special about `pl_lut_parse_cube`. struct pl_custom_lut { // Some unique signature identifying this LUT, needed to detect state // changes (for cache invalidation). This should ideally be a hash of the // file contents. (Which is what `pl_lut_parse_*` will set it to.) uint64_t signature; // Size of each dimension, in the order R, G, B. For 1D LUTs, only the R // dimension should be specified (the others left as 0). int size[3]; // Raw LUT data itself, in properly scaled floating point format. For 3D // LUTs, the innermost dimension is the first dimension (R), and the // outermost dimension is the last dimension (B). Individual color samples // are in the order R, G, B. const float *data; // Extra input/output shaper matrices. Ignored if equal to {0}. This is // mostly useful for 1D LUTs, since 3D LUTs can bake the shaper matrix into // the LUT itself - but it can still help optimize LUT precision. struct pl_matrix3x3 shaper_in, shaper_out; // Nominal metadata for the input/output of a LUT. Left as {0} if unknown. // Note: This is purely informative, `pl_shader_custom_lut` ignores it. struct pl_color_repr repr_in, repr_out; struct pl_color_space color_in, color_out; }; // Parse a 3DLUT in .cube format. Returns NULL if the file fails parsing. struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *str, size_t str_len); // Frees a LUT created by `pl_lut_parse_*`. void pl_lut_free(struct pl_custom_lut **lut); // Apply a `pl_custom_lut`. The user is responsible for ensuring colors going // into the LUT are in the expected format as informed by the LUT metadata. // // `lut_state` must be a pointer to a NULL-initialized shader state object that // will be used to encapsulate any required GPU state. // // Note: `lut` does not have to be allocated by `pl_lut_parse_*`. It can be a // struct filled out by the user. void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, pl_shader_obj *lut_state); PL_API_END #endif // LIBPLACEBO_SHADERS_LUT_H_ libplacebo-v4.192.1/src/include/libplacebo/shaders/sampling.h000066400000000000000000000225171417677245700241270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_SAMPLING_H_ #define LIBPLACEBO_SHADERS_SAMPLING_H_ // Sampling operations. These shaders perform some form of sampling operation // from a given pl_tex. In order to use these, the pl_shader *must* have been // created using the same `gpu` as the originating `pl_tex`. Otherwise, this // is undefined behavior. They require nothing (PL_SHADER_SIG_NONE) and return // a color (PL_SHADER_SIG_COLOR). #include #include PL_API_BEGIN // Common parameters for sampling operations struct pl_sample_src { // There are two mutually exclusive ways of providing the source to sample // from: // // 1. Provide the texture and sampled region directly. This generates // a shader with input signature `PL_SHADER_SIG_NONE`, which binds the // texture as a descriptor (and the coordinates as a vertex attribute) pl_tex tex; // texture to sample struct pl_rect2df rect; // sub-rect to sample from (optional) enum pl_tex_address_mode address_mode; // preferred texture address mode // 2. Have the shader take it as an argument. Doing this requires // specifying the missing metadata of the texture backing the sampler, so // that the shader generation can generate the correct code. int tex_w, tex_h; // dimensions of the actual texture enum pl_fmt_type format; // format of the sampler being accepted enum pl_sampler_type sampler; // type of the sampler being accepted enum pl_tex_sample_mode mode; // sample mode of the sampler being accepted float sampled_w, sampled_h; // dimensions of the sampled region (optional) // Common metadata for both sampler input types: int components; // number of components to sample (optional) uint8_t component_mask; // bitmask of components to sample (optional) int new_w, new_h; // dimensions of the resulting output (optional) float scale; // factor to multiply into sampled signal (optional) // Note: `component_mask` and `components` are mutually exclusive, the // former is preferred if both are specified. }; #define pl_sample_src(...) (&(struct pl_sample_src) { __VA_ARGS__ }) struct pl_deband_params { // The number of debanding steps to perform per sample. Each step reduces a // bit more banding, but takes time to compute. Note that the strength of // each step falls off very quickly, so high numbers (>4) are practically // useless. Defaults to 1. int iterations; // The debanding filter's cut-off threshold. Higher numbers increase the // debanding strength dramatically, but progressively diminish image // details. Defaults to 4.0. float threshold; // The debanding filter's initial radius. The radius increases linearly // for each iteration. A higher radius will find more gradients, but a // lower radius will smooth more aggressively. Defaults to 16.0. float radius; // Add some extra noise to the image. This significantly helps cover up // remaining quantization artifacts. Higher numbers add more noise. // Note: When debanding HDR sources, even a small amount of grain can // result in a very big change to the brightness level. It's recommended to // either scale this value down or disable it entirely for HDR. // // Defaults to 6.0, which is very mild. float grain; }; #define PL_DEBAND_DEFAULTS \ .iterations = 1, \ .threshold = 4.0, \ .radius = 16.0, \ .grain = 6.0, #define pl_deband_params(...) (&(struct pl_deband_params) {PL_DEBAND_DEFAULTS __VA_ARGS__ }) extern const struct pl_deband_params pl_deband_default_params; // Debands a given texture and returns the sampled color in `vec4 color`. If // `params` is left as NULL, it defaults to &pl_deband_default_params. Note // that `tex->params.format` must have PL_FMT_CAP_LINEAR. When the given // `pl_sample_src` implies scaling, this effectively performs bilinear // sampling on the input (but not the output). // // Note: This can also be used as a pure grain function, by setting the number // of iterations to 0. void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src, const struct pl_deband_params *params); // Performs direct / native texture sampling, using whatever texture filter is // available (linear for linearly sampleable sources, nearest otherwise). // // Note: This is generally very low quality and should be avoided if possible, // for both upscaling and downscaling. bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src); // Performs hardware-accelerated nearest neighbour sampling. This is similar to // `pl_shader_sample_direct`, but forces nearest neighbour interpolation. bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src); // Performs hardware-accelerated bilinear sampling. This is similar to // `pl_shader_sample_direct`, but forces bilinear interpolation. bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src); // Performs hardware-accelerated / efficient bicubic sampling. This is more // efficient than using the generalized sampling routines and // pl_filter_function_bicubic. Only works well when upscaling - avoid for // downscaling. bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src); // A sampler that is similar to nearest neighbour sampling, but tries to // preserve pixel aspect ratios. This is mathematically equivalent to taking an // idealized image with square pixels, sampling it at an infinite resolution, // and then downscaling that to the desired resolution. (Hence it being called // "oversample"). Good for pixel art. // // The threshold provides a cutoff threshold below which the contribution of // pixels should be ignored, trading some amount of aspect ratio distortion for // a slightly crisper image. A value of `threshold == 0.5` makes this filter // equivalent to regular nearest neighbour sampling. bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src, float threshold); struct pl_sample_filter_params { // The filter to use for sampling. struct pl_filter_config filter; // The precision of the LUT. Defaults to 64 if unspecified. int lut_entries; // See `pl_filter_params.cutoff`. Defaults to 0.001 if unspecified. Only // relevant for polar filters. float cutoff; // Antiringing strength. A value of 0.0 disables antiringing, and a value // of 1.0 enables full-strength antiringing. Defaults to 0.0 if // unspecified. Only relevant for separated/orthogonal filters. float antiring; // Disable the use of compute shaders (e.g. if rendering to non-storable tex) bool no_compute; // Disable the use of filter widening / anti-aliasing (for downscaling) bool no_widening; // This shader object is used to store the LUT, and will be recreated // if necessary. To avoid thrashing the resource, users should avoid trying // to re-use the same LUT for different filter configurations or scaling // ratios. Must be set to a valid pointer, and the target NULL-initialized. pl_shader_obj *lut; }; #define pl_sample_filter_params(...) (&(struct pl_sample_filter_params) { __VA_ARGS__ }) // Performs polar sampling. This internally chooses between an optimized compute // shader, and various fragment shaders, depending on the supported GLSL version // and GPU features. Returns whether or not it was successful. // // Note: `params->filter.polar` must be true to use this function. bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); enum { PL_SEP_VERT = 0, PL_SEP_HORIZ, PL_SEP_PASSES }; // Performs orthogonal (1D) sampling. Using this twice in a row (once vertical // and once horizontal) effectively performs a 2D upscale. This is lower // quality than polar sampling, but significantly faster, and therefore the // recommended default. Returns whether or not it was successful. // // 0 <= pass < PL_SEP_PASSES indicates which component of the transformation to // apply. PL_SEP_VERT only applies the vertical component, and PL_SEP_HORIZ // only the horizontal. The non-relevant component of the `src->rect` is ignored // entirely. // // Note: Due to internal limitations, this may currently only be used on 2D // textures - even though the basic principle would work for 1D and 3D textures // as well. bool pl_shader_sample_ortho(pl_shader sh, int pass, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_SAMPLING_H_ libplacebo-v4.192.1/src/include/libplacebo/swapchain.h000066400000000000000000000203231417677245700226320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SWAPCHAIN_H_ #define LIBPLACEBO_SWAPCHAIN_H_ #include #include #include PL_API_BEGIN // This abstraction represents a low-level interface to visible surfaces // exposed by a graphics API (and accompanying GPU instance), allowing users to // directly present frames to the screen (or window, typically). This is a // sister API to gpu.h and follows the same convention w.r.t undefined behavior. // // Thread-safety: Safe typedef const PL_STRUCT(pl_swapchain) { pl_log log; pl_gpu gpu; // The underlying implementation struct pl_sw_fns *impl; // (Deprecated) Backwards compatibility field. Equal to `log`. pl_log ctx PL_DEPRECATED; } *pl_swapchain; // Destroys this swapchain. May be used at any time, and may block until the // completion of all outstanding rendering commands. The swapchain and any // resources retrieved from it must not be used afterwards. void pl_swapchain_destroy(pl_swapchain *sw); // Returns the approximate current swapchain latency in vsyncs, or 0 if // unknown. A latency of 1 means that `submit_frame` followed by `swap_buffers` // will block until the just-submitted frame has finished rendering. Typical // values are 2 or 3, which enable better pipelining by allowing the GPU to be // processing one or two frames at the same time as the user is preparing the // next for submission. int pl_swapchain_latency(pl_swapchain sw); // Update/query the swapchain size. This function performs both roles: it tries // setting the swapchain size to the values requested by the user, and returns // in the same variables what width/height the swapchain was actually set to - // which may be (substantially) different from the values requested by the // user. A value of 0 means "unknown/none" (in which case, libplacebo won't try // updating the size - it will simply return the current state of the // swapchain). It's also possible for libplacebo to return values of 0, such as // in the case that the swapchain doesn't exist yet. // // Returns false on significant errors (e.g. dead surface). This function can // effectively be used to probe if creating a swapchain works. bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height); // Backwards compatibility #define pl_swapchain_colors pl_color_space // Inform the swapchain about the input color space. This API deliberately // provides no feedback, because the swapchain can internally decide what to do // with this information, including ignoring it entirely, or applying it // asynchronously. Users must still base their rendering on the value of // `pl_swapchain_frame.color_space`. // // Note: Calling this function a second time completely overrides any // previously specified hint. So calling this on {0} or NULL resets the // swapchain back to its initial/preferred colorspace. // // Note: If `csp->transfer` is a HDR transfer curve but HDR metadata is left // unspecified, the HDR metadata defaults to `pl_hdr_metadata_hdr10`. // Conversely, if the HDR metadata is non-empty but `csp->transfer` is left as // PL_COLOR_TRC_UNKNOWN, then it instead defaults to PL_COLOR_TRC_PQ. void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp); // Backwards compatibility wrapper for `pl_swapchain_colorspace_hint`. Always // returns `true`. (Deprecated) bool pl_swapchain_hdr_metadata(pl_swapchain sw, const struct pl_hdr_metadata *metadata) PL_DEPRECATED; // The struct used to hold the results of `pl_swapchain_start_frame` struct pl_swapchain_frame { // A texture representing the framebuffer users should use for rendering. // It's guaranteed that `fbo->params.renderable` and `fbo->params.blit_dst` // will be true, but no other guarantees are made - not even that // `fbo->params.format` is a real format. pl_tex fbo; // If true, the user should assume that this framebuffer will be flipped // as a result of presenting it on-screen. If false, nothing special needs // to be done - but if true, users should flip the coordinate system of // the `pl_pass` that is rendering to this framebuffer. // // Note: Normally, libplacebo follows the convention that (0,0) represents // the top left of the image/screen. So when flipped is true, this means // (0,0) on this framebuffer gets displayed as the bottom left of the image. bool flipped; // Indicates the color representation this framebuffer will be interpreted // as by the host system / compositor / display, including the bit depth // and alpha handling (where available). struct pl_color_repr color_repr; struct pl_color_space color_space; }; // Retrieve a new frame from the swapchain. Returns whether successful. It's // worth noting that this function can fail sporadically for benign reasons, // for example the window being invisible or inaccessible. This function may // block until an image is available, which may be the case if the GPU is // rendering frames significantly faster than the display can output them. It // may also be non-blocking, so users shouldn't rely on this call alone in // order to meter rendering speed. (Specifics depend on the underlying graphics // API) bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame); // Submits the previously started frame. Non-blocking. This must be issued in // lockstep with pl_swapchain_start_frame - there is no way to start multiple // frames and submit them out-of-order. The frames submitted this way will // generally be made visible in a first-in first-out fashion, although // specifics depend on the mechanism used to create the pl_swapchain. (See the // platform-specific APIs for more info). // // Returns whether successful. This should normally never fail, unless the // GPU/surface has been lost or some other critical error has occurred. The // "started" frame is consumed even in the event of failure. // // Note that `start_frame` and `submit_frame` form a lock pair, i.e. trying to // call e.g. `pl_swapchain_resize` from another thread will block until // `pl_swapchain_submit_frame` is finished. bool pl_swapchain_submit_frame(pl_swapchain sw); // Performs a "buffer swap", or some generalization of the concept. In layman's // terms, this blocks until the execution of the Nth previously submitted frame // has been "made complete" in some sense. (The N derives from the swapchain's // built-in latency. See `pl_swapchain_latency` for more information). // // Users should include this call in their rendering loops in order to make // sure they aren't submitting rendering commands faster than the GPU can // process them, which would potentially lead to a queue overrun or exhaust // memory. // // An example loop might look like this: // // while (rendering) { // struct pl_swapchain_frame frame; // bool ok = pl_swapchain_start_frame(swapchain, &frame); // if (!ok) { // /* wait some time, or decide to stop rendering */ // continue; // } // // /* do some rendering with frame.fbo */ // // ok = pl_swapchain_submit_frame(swapchain); // if (!ok) // break; // // pl_swapchain_swap_buffers(swapchain); // } // // The duration this function blocks for, if at all, may be very inconsistent // and should not be used as an authoritative source of vsync timing // information without sufficient smoothing/filtering (and if so, the time that // `start_frame` blocked for should also be included). void pl_swapchain_swap_buffers(pl_swapchain sw); PL_API_END #endif // LIBPLACEBO_SWAPCHAIN_H_ libplacebo-v4.192.1/src/include/libplacebo/tone_mapping.h000066400000000000000000000213011417677245700233320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_TONE_MAPPING_H_ #define LIBPLACEBO_TONE_MAPPING_H_ #include #include #include PL_API_BEGIN enum pl_hdr_scaling { PL_HDR_NORM = 0, // 0.0 is absolute black, 1.0 is PL_COLOR_SDR_WHITE PL_HDR_SQRT, // sqrt() of PL_HDR_NORM values PL_HDR_NITS, // absolute brightness in raw cd/m² PL_HDR_PQ, // absolute brightness in PQ (0.0 to 1.0) PL_HDR_SCALING_COUNT, }; // Generic helper for performing HDR scale conversions. float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x); struct pl_tone_map_params; struct pl_tone_map_function { const char *name; // Identifier const char *description; // Friendly / longer name // If set, `pl_tone_map_params.param` can be adjusted to alter the // characteristics of the tone mapping function in some way. (Optional) const char *param_desc; // Name of parameter float param_min; float param_def; float param_max; // This controls the type of values input/output to/from `map` enum pl_hdr_scaling scaling; // The tone-mapping function itself. Iterates over all values in `lut`, and // adapts them as needed. // // Note that the `params` struct fed into this function is guaranteed to // satisfy `params->input_scaling == params->output_scaling == scaling`, // and also obeys `params->input_max >= params->output_max`. void (*map)(float *lut, const struct pl_tone_map_params *params); // Inverse tone mapping function. Optional. If absent, this tone mapping // curve only works in the forwards direction. // // For this function, `params->input_max <= params->output_max`. void (*map_inverse)(float *lut, const struct pl_tone_map_params *params); // Private data. Unused by libplacebo, but may be accessed by `map`. void *priv; }; struct pl_tone_map_params { // If `function` is NULL, defaults to `pl_tone_map_clip`. const struct pl_tone_map_function *function; float param; // or 0.0 for default // The desired input/output scaling of the tone map. If this differs from // `function->scaling`, any required conversion will be performed. // // Note that to maximize LUT efficiency, it's *highly* recommended to use // either PL_HDR_PQ or PL_HDR_SQRT as the input scaling, except when // using `pl_tone_map_sample`. enum pl_hdr_scaling input_scaling; enum pl_hdr_scaling output_scaling; // The size of the resulting LUT. (For `pl_tone_map_generate` only) size_t lut_size; // The characteristics of the input, in `input_scaling` units. float input_min; float input_max; // The desired characteristics of the output, in `output_scaling` units. float output_min; float output_max; }; #define pl_tone_map_params(...) (&(struct pl_tone_map_params) { __VA_ARGS__ }); // Note: Only does pointer equality testing on `function` bool pl_tone_map_params_equal(const struct pl_tone_map_params *a, const struct pl_tone_map_params *b); // Returns true if the given tone mapping configuration effectively represents // a no-op configuration. Tone mapping can be skipped in this case (although // strictly speaking, the LUT would still clip illegal input values) bool pl_tone_map_params_noop(const struct pl_tone_map_params *params); // Generate a tone-mapping LUT for a given configuration. This will always // span the entire input range, as given by `input_min` and `input_max`. void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params); // Samples a tone mapping function at a single position. Note that this is less // efficient than `pl_tone_map_generate` for generating multiple values. // // Ignores `params->lut_size`. float pl_tone_map_sample(float x, const struct pl_tone_map_params *params); // Special tone mapping function that means "automatically pick a good function // based on the HDR levels". This is an opaque tone map function with no // meaningful internal representation. (Besides `name` and `description`) extern const struct pl_tone_map_function pl_tone_map_auto; // Performs no tone-mapping, just clips out-of-range colors. Retains perfect // color accuracy for in-range colors but completely destroys out-of-range // information. Does not perform any black point adaptation. extern const struct pl_tone_map_function pl_tone_map_clip; // EETF from the ITU-R Report BT.2390, a hermite spline roll-off with linear // segment. The knee point offset is configurable. Note that this defaults to // 1.0, rather than the value of 0.5 from the ITU-R spec. extern const struct pl_tone_map_function pl_tone_map_bt2390; // EETF from ITU-R Report BT.2446, method A. Can be used for both forward // and inverse tone mapping. Not configurable. extern const struct pl_tone_map_function pl_tone_map_bt2446a; // Simple spline consisting of two polynomials, joined by a single pivot point. // The parameter gives the pivot point (in PQ space), defaulting to 0.30. // Can be used for both forward and inverse tone mapping. extern const struct pl_tone_map_function pl_tone_map_spline; // Simple non-linear, global tone mapping algorithm. Named after Erik Reinhard. // The parameter specifies the local contrast coefficient at the display peak. // Essentially, a value of param=0.5 implies that the reference white will be // about half as bright as when clipping. Defaults to 0.5, which results in the // simplest formulation of this function. extern const struct pl_tone_map_function pl_tone_map_reinhard; // Generalization of the reinhard tone mapping algorithm to support an // additional linear slope near black. The tone mapping parameter indicates the // trade-off between the linear section and the non-linear section. // Essentially, for param=0.5, every color value below 0.5 will be mapped // linearly, with the higher values being non-linearly tone mapped. Values near // 1.0 make this curve behave like pl_tone_map_clip, and values near 0.0 make // this curve behave like pl_tone_map_reinhard. The default value is 0.3, which // provides a good balance between colorimetric accuracy and preserving // out-of-gamut details. The name is derived from its function shape // (ax+b)/(cx+d), which is known as a Möbius transformation in mathematics. extern const struct pl_tone_map_function pl_tone_map_mobius; // Piece-wise, filmic tone-mapping algorithm developed by John Hable for use in // Uncharted 2, inspired by a similar tone-mapping algorithm used by Kodak. // Popularized by its use in video games with HDR rendering. Preserves both // dark and bright details very well, but comes with the drawback of changing // the average brightness quite significantly. This is sort of similar to // pl_tone_map_reinhard with parameter 0.24. extern const struct pl_tone_map_function pl_tone_map_hable; // Fits a gamma (power) function to transfer between the source and target // color spaces, effectively resulting in a perceptual hard-knee joining two // roughly linear sections. This preserves details at all scales fairly // accurately, but can result in an image with a muted or dull appearance. The // parameter is used as the cutoff point, defaulting to 0.5. extern const struct pl_tone_map_function pl_tone_map_gamma; // Linearly stretches the input range to the output range, in PQ space. This // will preserve all details accurately, but results in a significantly // different average brightness. Can be used for inverse tone-mapping in // addition to regular tone-mapping. The parameter can be used as an additional // linear gain coefficient (defaulting to 1.0). extern const struct pl_tone_map_function pl_tone_map_linear; // A list of built-in tone mapping functions, terminated by NULL extern const struct pl_tone_map_function * const pl_tone_map_functions[]; extern const int pl_num_tone_map_functions; // excluding trailing NULL // Find the tone mapping function with the given name, or NULL on failure. const struct pl_tone_map_function *pl_find_tone_map_function(const char *name); PL_API_END #endif // LIBPLACEBO_TONE_MAPPING_H_ libplacebo-v4.192.1/src/include/libplacebo/utils/000077500000000000000000000000001417677245700216445ustar00rootroot00000000000000libplacebo-v4.192.1/src/include/libplacebo/utils/dav1d.h000066400000000000000000000127011417677245700230150ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DAV1D_H_ #define LIBPLACEBO_DAV1D_H_ #include #include #include PL_API_BEGIN // Fill in the details of a `pl_frame` from a Dav1dPicture. This function will // explicitly clear `out_frame`, setting all extra fields to 0. After this // function returns, the only missing data is information related to the plane // texture itself (`planes[N].texture`). // // Note: This will include all possible metadata, including HDR metadata and // AV1 film grain data. Users should explicitly clear this out if undesired. static void pl_frame_from_dav1dpicture(struct pl_frame *out_frame, const Dav1dPicture *picture); // Helper function to generate a `pl_swapchain_colors` struct from a Dav1dPicture. // Useful to update the swapchain colorspace mode dynamically (e.g. for HDR). static void pl_swapchain_colors_from_dav1dpicture(struct pl_swapchain_colors *out_colors, const Dav1dPicture *picture); struct pl_dav1d_upload_params { // The picture to upload. Not modified unless `asynchronous` is true. Dav1dPicture *picture; // If true, film grain present in `picture` will be exported to the // `pl_frame` as well. This should be set to false unless the user has // disabled `Dav1dSettings.apply_grain`. bool film_grain; // If true, libplacebo will probe for the allocation metadata set by // `pl_allocate_dav1dpicture`, and directly import the attached buffers // (saving a memcpy in some cases). Has no effect if the Dav1dPicture was // not allocated using `pl_allocate_dav1dpicture`. // // Note: When this is the case, `asynchronous` has no further effect - // uploads from attached buffers are already asynchronous. bool gpu_allocated; // If true, `picture` will be asynchronously uploaded and unref'd // internally by libplacebo, and the struct passed by the user cleared to // {0}. This is needed to avoid `memcpy` in some cases, so setting it to // true is highly recommended wherever possible. // // Note: If `pl_upload_dav1dpicture` returns false, `picture` does not get // unref'd. bool asynchronous; }; #define pl_dav1d_upload_params(...) (&(struct pl_dav1d_upload_params) { __VA_ARGS__ }) // Very high level helper function to take a `Dav1dPicture` and upload it to // the GPU. Similar in spirit to `pl_upload_plane`, and the same notes apply. // `tex` must be an array of 3 pointers of type `pl_tex`, each // either pointing to a valid texture, or NULL. Returns whether successful. static bool pl_upload_dav1dpicture(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[3], const struct pl_dav1d_upload_params *params); // Allocate a Dav1dPicture from persistently mapped buffers. This can be more // efficient than regular Dav1dPictures, especially when using the synchronous // `pl_upload_dav1dpicture`, or on platforms that don't support importing // PL_HANDLE_HOST_PTR as buffers. Returns 0 or a negative DAV1D_ERR value. // // Note: These may only be used directly as a Dav1dPicAllocator if the `gpu` // passed as the value of `cookie` is `pl_gpu.limits.thread_safe`. Otherwise, // the user must manually synchronize this to ensure it runs on the correct // thread. static int pl_allocate_dav1dpicture(Dav1dPicture *picture, void *gpu); static void pl_release_dav1dpicture(Dav1dPicture *picture, void *gpu); // Mapping functions for the various Dav1dColor* enums. Note that these are not // quite 1:1, and even for values that exist in both, the semantics sometimes // differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in // libplacebo and libdav1d, respectively. static enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc); static enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys); static enum pl_color_levels pl_levels_from_dav1d(int color_range); static int pl_levels_to_dav1d(enum pl_color_levels levels); static enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim); static enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim); static enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc); static enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc); static enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc); static enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc); // Actual implementation, included as part of this header to avoid having // a compile-time dependency on libdav1d. #include PL_API_END #endif // LIBPLACEBO_DAV1D_H_ libplacebo-v4.192.1/src/include/libplacebo/utils/dav1d_internal.h000066400000000000000000000552341417677245700247210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DAV1D_H_ #error This header should be included as part of #else #include #include #include static inline enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc) { switch (mc) { case DAV1D_MC_IDENTITY: return PL_COLOR_SYSTEM_RGB; // or XYZ (unlikely) case DAV1D_MC_BT709: return PL_COLOR_SYSTEM_BT_709; case DAV1D_MC_UNKNOWN: return PL_COLOR_SYSTEM_UNKNOWN; case DAV1D_MC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_BT470BG: return PL_COLOR_SYSTEM_BT_601; case DAV1D_MC_BT601: return PL_COLOR_SYSTEM_BT_601; case DAV1D_MC_SMPTE240: return PL_COLOR_SYSTEM_SMPTE_240M; case DAV1D_MC_SMPTE_YCGCO: return PL_COLOR_SYSTEM_YCGCO; case DAV1D_MC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; case DAV1D_MC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; case DAV1D_MC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_CHROMAT_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_CHROMAT_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing // Note: this colorspace is confused between PQ and HLG, which dav1d // requires inferring from other sources, but libplacebo makes // explicit. Default to PQ as it's the more common scenario. case DAV1D_MC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; case DAV1D_MC_RESERVED: abort(); } return PL_COLOR_SYSTEM_UNKNOWN; } static inline enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: return DAV1D_MC_UNKNOWN; case PL_COLOR_SYSTEM_BT_601: return DAV1D_MC_BT601; case PL_COLOR_SYSTEM_BT_709: return DAV1D_MC_BT709; case PL_COLOR_SYSTEM_SMPTE_240M: return DAV1D_MC_SMPTE240; case PL_COLOR_SYSTEM_BT_2020_NC: return DAV1D_MC_BT2020_NCL; case PL_COLOR_SYSTEM_BT_2020_C: return DAV1D_MC_BT2020_CL; case PL_COLOR_SYSTEM_BT_2100_PQ: return DAV1D_MC_ICTCP; case PL_COLOR_SYSTEM_BT_2100_HLG: return DAV1D_MC_ICTCP; case PL_COLOR_SYSTEM_DOLBYVISION: return DAV1D_MC_UNKNOWN; // missing case PL_COLOR_SYSTEM_YCGCO: return DAV1D_MC_SMPTE_YCGCO; case PL_COLOR_SYSTEM_RGB: return DAV1D_MC_IDENTITY; case PL_COLOR_SYSTEM_XYZ: return DAV1D_MC_IDENTITY; case PL_COLOR_SYSTEM_COUNT: abort(); } return DAV1D_MC_UNKNOWN; } static inline enum pl_color_levels pl_levels_from_dav1d(int color_range) { return color_range ? PL_COLOR_LEVELS_FULL : PL_COLOR_LEVELS_LIMITED; } static inline int pl_levels_to_dav1d(enum pl_color_levels levels) { return levels == PL_COLOR_LEVELS_FULL; } static inline enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim) { switch (prim) { case DAV1D_COLOR_PRI_BT709: return PL_COLOR_PRIM_BT_709; case DAV1D_COLOR_PRI_UNKNOWN: return PL_COLOR_PRIM_UNKNOWN; case DAV1D_COLOR_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; case DAV1D_COLOR_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; case DAV1D_COLOR_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; case DAV1D_COLOR_PRI_BT601: return PL_COLOR_PRIM_BT_601_525; case DAV1D_COLOR_PRI_SMPTE240: return PL_COLOR_PRIM_BT_601_525; case DAV1D_COLOR_PRI_FILM: return PL_COLOR_PRIM_FILM_C; case DAV1D_COLOR_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; case DAV1D_COLOR_PRI_XYZ: return PL_COLOR_PRIM_CIE_1931; case DAV1D_COLOR_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; case DAV1D_COLOR_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; case DAV1D_COLOR_PRI_EBU3213: return PL_COLOR_PRIM_EBU_3213; } return PL_COLOR_PRIM_UNKNOWN; } static inline enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: return DAV1D_COLOR_PRI_UNKNOWN; case PL_COLOR_PRIM_BT_601_525: return DAV1D_COLOR_PRI_BT601; case PL_COLOR_PRIM_BT_601_625: return DAV1D_COLOR_PRI_BT470BG; case PL_COLOR_PRIM_BT_709: return DAV1D_COLOR_PRI_BT709; case PL_COLOR_PRIM_BT_470M: return DAV1D_COLOR_PRI_BT470M; case PL_COLOR_PRIM_EBU_3213: return DAV1D_COLOR_PRI_EBU3213; case PL_COLOR_PRIM_BT_2020: return DAV1D_COLOR_PRI_BT2020; case PL_COLOR_PRIM_APPLE: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_ADOBE: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_PRO_PHOTO: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_CIE_1931: return DAV1D_COLOR_PRI_XYZ; case PL_COLOR_PRIM_DCI_P3: return DAV1D_COLOR_PRI_SMPTE431; case PL_COLOR_PRIM_DISPLAY_P3: return DAV1D_COLOR_PRI_SMPTE432; case PL_COLOR_PRIM_V_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_S_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_FILM_C: return DAV1D_COLOR_PRI_FILM; case PL_COLOR_PRIM_COUNT: abort(); } return DAV1D_COLOR_PRI_UNKNOWN; } static inline enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc) { switch (trc) { case DAV1D_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_UNKNOWN: return PL_COLOR_TRC_UNKNOWN; case DAV1D_TRC_BT470M: return PL_COLOR_TRC_GAMMA22; case DAV1D_TRC_BT470BG: return PL_COLOR_TRC_GAMMA28; case DAV1D_TRC_BT601: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_SMPTE240: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; case DAV1D_TRC_LOG100: return PL_COLOR_TRC_UNKNOWN; // missing case DAV1D_TRC_LOG100_SQRT10: return PL_COLOR_TRC_UNKNOWN; // missing case DAV1D_TRC_IEC61966: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_BT1361: return PL_COLOR_TRC_BT_1886; // ETOF != OETF case DAV1D_TRC_SRGB: return PL_COLOR_TRC_SRGB; case DAV1D_TRC_BT2020_10BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_BT2020_12BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; case DAV1D_TRC_SMPTE428: return PL_COLOR_TRC_UNKNOWN; // missing case DAV1D_TRC_HLG: return PL_COLOR_TRC_HLG; case DAV1D_TRC_RESERVED: abort(); } return PL_COLOR_TRC_UNKNOWN; } static inline enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: return DAV1D_TRC_UNKNOWN; case PL_COLOR_TRC_BT_1886: return DAV1D_TRC_BT709; // EOTF != OETF case PL_COLOR_TRC_SRGB: return DAV1D_TRC_SRGB; case PL_COLOR_TRC_LINEAR: return DAV1D_TRC_LINEAR; case PL_COLOR_TRC_GAMMA18: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA20: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA22: return DAV1D_TRC_BT470M; case PL_COLOR_TRC_GAMMA24: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA26: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA28: return DAV1D_TRC_BT470BG; case PL_COLOR_TRC_PRO_PHOTO: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_PQ: return DAV1D_TRC_SMPTE2084; case PL_COLOR_TRC_HLG: return DAV1D_TRC_HLG; case PL_COLOR_TRC_V_LOG: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_S_LOG1: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_S_LOG2: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_COUNT: abort(); } return DAV1D_TRC_UNKNOWN; } static inline enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc) { switch (loc) { case DAV1D_CHR_UNKNOWN: return PL_CHROMA_UNKNOWN; case DAV1D_CHR_VERTICAL: return PL_CHROMA_LEFT; case DAV1D_CHR_COLOCATED: return PL_CHROMA_TOP_LEFT; } return PL_CHROMA_UNKNOWN; } static inline enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc) { switch (loc) { case PL_CHROMA_UNKNOWN: return DAV1D_CHR_UNKNOWN; case PL_CHROMA_LEFT: return DAV1D_CHR_VERTICAL; case PL_CHROMA_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_TOP_LEFT: return DAV1D_CHR_COLOCATED; case PL_CHROMA_TOP_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_BOTTOM_LEFT: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_BOTTOM_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_COUNT: abort(); } return DAV1D_CHR_UNKNOWN; } static inline float pl_fixed24_8(uint32_t n) { return (float) n / (1 << 8); } static inline float pl_fixed18_14(uint32_t n) { return (float) n / (1 << 14); } static inline float pl_fixed0_16(uint16_t n) { return (float) n / (1 << 16); } // Align to a power of 2 #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) static inline void pl_frame_from_dav1dpicture(struct pl_frame *out, const Dav1dPicture *picture) { const Dav1dSequenceHeader *seq_hdr = picture->seq_hdr; int num_planes; switch (picture->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: num_planes = 1; break; case DAV1D_PIXEL_LAYOUT_I420: case DAV1D_PIXEL_LAYOUT_I422: case DAV1D_PIXEL_LAYOUT_I444: num_planes = 3; break; default: abort(); } *out = (struct pl_frame) { .num_planes = num_planes, .planes = { // Components are always in order, which makes things easy { .components = 1, .component_mapping = {0}, }, { .components = 1, .component_mapping = {1}, }, { .components = 1, .component_mapping = {2}, }, }, .crop = { 0, 0, picture->p.w, picture->p.h, }, .color = { .primaries = pl_primaries_from_dav1d(seq_hdr->pri), .transfer = pl_transfer_from_dav1d(seq_hdr->trc), }, .repr = { .sys = pl_system_from_dav1d(seq_hdr->mtrx), .levels = pl_levels_from_dav1d(seq_hdr->color_range), .bits = { .sample_depth = PL_ALIGN2(picture->p.bpc, 8), .color_depth = picture->p.bpc, }, }, }; if (seq_hdr->mtrx == DAV1D_MC_ICTCP && seq_hdr->trc == DAV1D_TRC_HLG) { // dav1d makes no distinction between PQ and HLG ICtCp, so we need // to manually fix it in the case that we have HLG ICtCp data. out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; } else if (seq_hdr->mtrx == DAV1D_MC_IDENTITY && seq_hdr->pri == DAV1D_COLOR_PRI_XYZ) { // dav1d handles this as a special case, but doesn't provide an // explicit flag for it either, so we have to resort to this ugly hack, // even though CIE 1931 RGB *is* a valid thing in principle! out->repr.sys= PL_COLOR_SYSTEM_XYZ; } else if (!out->repr.sys) { // PL_COLOR_SYSTEM_UNKNOWN maps to RGB, so hard-code this one out->repr.sys = pl_color_system_guess_ycbcr(picture->p.w, picture->p.h); } const Dav1dContentLightLevel *cll = picture->content_light; if (cll) { out->color.hdr.max_cll = cll->max_content_light_level; out->color.hdr.max_fall = cll->max_frame_average_light_level; } // This overrides the CLL values above, if both are present const Dav1dMasteringDisplay *md = picture->mastering_display; if (md) { out->color.hdr.max_luma = pl_fixed24_8(md->max_luminance); out->color.hdr.min_luma = pl_fixed18_14(md->min_luminance); out->color.hdr.prim = (struct pl_raw_primaries) { .red.x = pl_fixed0_16(md->primaries[0][0]), .red.y = pl_fixed0_16(md->primaries[0][1]), .green.x = pl_fixed0_16(md->primaries[1][0]), .green.y = pl_fixed0_16(md->primaries[1][1]), .blue.x = pl_fixed0_16(md->primaries[2][0]), .blue.y = pl_fixed0_16(md->primaries[2][1]), .white.x = pl_fixed0_16(md->white_point[0]), .white.y = pl_fixed0_16(md->white_point[1]), }; } if (picture->frame_hdr->film_grain.present) { const Dav1dFilmGrainData *fg = &picture->frame_hdr->film_grain.data; out->film_grain = (struct pl_film_grain_data) { .type = PL_FILM_GRAIN_AV1, .seed = fg->seed, .params.av1 = { .num_points_y = fg->num_y_points, .chroma_scaling_from_luma = fg->chroma_scaling_from_luma, .num_points_uv = { fg->num_uv_points[0], fg->num_uv_points[1] }, .scaling_shift = fg->scaling_shift, .ar_coeff_lag = fg->ar_coeff_lag, .ar_coeff_shift = (int) fg->ar_coeff_shift, .grain_scale_shift = fg->grain_scale_shift, .uv_mult = { fg->uv_mult[0], fg->uv_mult[1] }, .uv_mult_luma = { fg->uv_luma_mult[0], fg->uv_luma_mult[1] }, .uv_offset = { fg->uv_offset[0], fg->uv_offset[1] }, .overlap = fg->overlap_flag, }, }; struct pl_av1_grain_data *av1 = &out->film_grain.params.av1; memcpy(av1->points_y, fg->y_points, sizeof(av1->points_y)); memcpy(av1->points_uv, fg->uv_points, sizeof(av1->points_uv)); memcpy(av1->ar_coeffs_y, fg->ar_coeffs_y, sizeof(av1->ar_coeffs_y)); memcpy(av1->ar_coeffs_uv[0], fg->ar_coeffs_uv[0], sizeof(av1->ar_coeffs_uv[0])); memcpy(av1->ar_coeffs_uv[1], fg->ar_coeffs_uv[1], sizeof(av1->ar_coeffs_uv[1])); } switch (picture->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: case DAV1D_PIXEL_LAYOUT_I444: break; case DAV1D_PIXEL_LAYOUT_I420: case DAV1D_PIXEL_LAYOUT_I422: // Only set the chroma location for definitely subsampled images pl_frame_set_chroma_location(out, pl_chroma_from_dav1d(seq_hdr->chr)); break; } } static inline void pl_swapchain_colors_from_dav1dpicture(struct pl_swapchain_colors *out_colors, const Dav1dPicture *picture) { struct pl_frame frame; pl_frame_from_dav1dpicture(&frame, picture); *out_colors = (struct pl_swapchain_colors) { .primaries = frame.color.primaries, .transfer = frame.color.transfer, }; const Dav1dContentLightLevel *cll = picture->content_light; if (cll) { out_colors->hdr.max_cll = cll->max_content_light_level; out_colors->hdr.max_fall = cll->max_frame_average_light_level; } const Dav1dMasteringDisplay *md = picture->mastering_display; if (md) { out_colors->hdr.min_luma = pl_fixed18_14(md->min_luminance); out_colors->hdr.max_luma = pl_fixed24_8(md->max_luminance); out_colors->hdr.prim.red.x = pl_fixed0_16(md->primaries[0][0]); out_colors->hdr.prim.red.y = pl_fixed0_16(md->primaries[0][1]); out_colors->hdr.prim.green.x = pl_fixed0_16(md->primaries[1][0]); out_colors->hdr.prim.green.y = pl_fixed0_16(md->primaries[1][1]); out_colors->hdr.prim.blue.x = pl_fixed0_16(md->primaries[2][0]); out_colors->hdr.prim.blue.y = pl_fixed0_16(md->primaries[2][1]); out_colors->hdr.prim.white.x = pl_fixed0_16(md->white_point[0]); out_colors->hdr.prim.white.y = pl_fixed0_16(md->white_point[1]); } } #define PL_MAGIC0 0x2c2a1269 #define PL_MAGIC1 0xc6d02577 struct pl_dav1dalloc { uint32_t magic[2]; pl_gpu gpu; pl_buf buf; }; struct pl_dav1dref { Dav1dPicture pic; uint8_t count; }; static void pl_dav1dpicture_unref(void *priv) { struct pl_dav1dref *ref = priv; if (--ref->count == 0) { dav1d_picture_unref(&ref->pic); free(ref); } } static inline bool pl_upload_dav1dpicture(pl_gpu gpu, struct pl_frame *out, pl_tex tex[3], const struct pl_dav1d_upload_params *params) { Dav1dPicture *pic = params->picture; pl_frame_from_dav1dpicture(out, pic); if (!params->film_grain) out->film_grain.type = PL_FILM_GRAIN_NONE; const int bytes = (pic->p.bpc + 7) / 8; // rounded up int sub_x = 0, sub_y = 0; switch (pic->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: case DAV1D_PIXEL_LAYOUT_I444: break; case DAV1D_PIXEL_LAYOUT_I420: sub_x = sub_y = 1; break; case DAV1D_PIXEL_LAYOUT_I422: sub_x = 1; break; } struct pl_plane_data data[3] = { { // Y plane .type = PL_FMT_UNORM, .width = pic->p.w, .height = pic->p.h, .pixel_stride = bytes, .row_stride = pic->stride[0], .component_size = {bytes * 8}, .component_map = {0}, }, { // U plane .type = PL_FMT_UNORM, .width = pic->p.w >> sub_x, .height = pic->p.h >> sub_y, .pixel_stride = bytes, .row_stride = pic->stride[1], .component_size = {bytes * 8}, .component_map = {1}, }, { // V plane .type = PL_FMT_UNORM, .width = pic->p.w >> sub_x, .height = pic->p.h >> sub_y, .pixel_stride = bytes, .row_stride = pic->stride[1], .component_size = {bytes * 8}, .component_map = {2}, }, }; pl_buf buf = NULL; struct pl_dav1dalloc *alloc = params->gpu_allocated ? pic->allocator_data : NULL; struct pl_dav1dref *ref = NULL; if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { // Re-use pre-allocated buffers directly assert(alloc->gpu == gpu); buf = alloc->buf; } else if (params->asynchronous && gpu->limits.callbacks) { ref = malloc(sizeof(*ref)); if (!ref) return false; memcpy(&ref->pic, pic, sizeof(Dav1dPicture)); ref->count = out->num_planes; } for (int p = 0; p < out->num_planes; p++) { if (buf) { data[p].buf = buf; data[p].buf_offset = (uintptr_t) pic->data[p] - (uintptr_t) buf->data; } else { data[p].pixels = pic->data[p]; if (ref) { data[p].priv = ref; data[p].callback = pl_dav1dpicture_unref; } } if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { free(ref); return false; } } if (params->asynchronous) { if (ref) { *pic = (Dav1dPicture) {0}; } else { dav1d_picture_unref(pic); } } return true; } static inline int pl_allocate_dav1dpicture(Dav1dPicture *p, void *cookie) { pl_gpu gpu = cookie; if (!gpu->limits.max_mapped_size || !gpu->limits.buf_transfer) return DAV1D_ERR(ENOTSUP); // Copied from dav1d_default_picture_alloc const int hbd = p->p.bpc > 8; const int aligned_w = PL_ALIGN2(p->p.w, 128); const int aligned_h = PL_ALIGN2(p->p.h, 128); const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400; const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444; p->stride[0] = aligned_w << hbd; p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0; // Align strides up to multiples of the GPU performance hints p->stride[0] = PL_ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_pitch); p->stride[1] = PL_ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_pitch); // Aligning offsets to 4 also implicitly aligns to the texel alignment (1 or 2) size_t off_align = PL_ALIGN2(gpu->limits.align_tex_xfer_offset, 4); const size_t y_sz = PL_ALIGN2(p->stride[0] * aligned_h, off_align); const size_t uv_sz = PL_ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align); // The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment, // even in the case that the driver gives us insane alignments const size_t pic_size = y_sz + 2 * uv_sz; const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4; // Validate size limitations if (total_size > gpu->limits.max_mapped_size) return DAV1D_ERR(ENOMEM); pl_buf buf = pl_buf_create(gpu, pl_buf_params( .size = total_size, .host_mapped = true, .memory_type = PL_BUF_MEM_HOST, )); if (!buf) return DAV1D_ERR(ENOMEM); struct pl_dav1dalloc *alloc = malloc(sizeof(struct pl_dav1dalloc)); if (!alloc) { pl_buf_destroy(gpu, &buf); return DAV1D_ERR(ENOMEM); } *alloc = (struct pl_dav1dalloc) { .magic = { PL_MAGIC0, PL_MAGIC1 }, .gpu = gpu, .buf = buf, }; assert(buf->data); uintptr_t base = (uintptr_t) buf->data, data[3]; data[0] = PL_ALIGN2(base, DAV1D_PICTURE_ALIGNMENT); data[1] = PL_ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT); data[2] = PL_ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT); p->allocator_data = alloc; p->data[0] = (void *) data[0]; p->data[1] = (void *) data[1]; p->data[2] = (void *) data[2]; return 0; } static inline void pl_release_dav1dpicture(Dav1dPicture *p, void *cookie) { struct pl_dav1dalloc *alloc = p->allocator_data; if (!alloc) return; assert(alloc->magic[0] == PL_MAGIC0); assert(alloc->magic[1] == PL_MAGIC1); assert(alloc->gpu == cookie); pl_buf_destroy(alloc->gpu, &alloc->buf); free(alloc); p->data[0] = p->data[1] = p->data[2] = p->allocator_data = NULL; } #undef PL_ALIGN2 #undef PL_MAGIC0 #undef PL_MAGIC1 #endif // LIBPLACEBO_DAV1D_H_ libplacebo-v4.192.1/src/include/libplacebo/utils/frame_queue.h000066400000000000000000000216321417677245700243170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_FRAME_QUEUE_H #define LIBPLACEBO_FRAME_QUEUE_H #include PL_API_BEGIN // An abstraction layer for automatically turning a conceptual stream of // (frame, pts) pairs, as emitted by a decoder or filter graph, into a // `pl_frame_mix` suitable for `pl_render_image_mix`. // // This API ensures that minimal work is performed (e.g. only mapping frames // that are actually required), while also satisfying the requirements // of any configured frame mixer. // // Thread-safety: Safe typedef PL_STRUCT(pl_queue) *pl_queue; enum pl_queue_status { PL_QUEUE_OK, // success PL_QUEUE_EOF, // no more frames are available PL_QUEUE_MORE, // more frames needed, but not (yet) available PL_QUEUE_ERR = -1, // some unknown error occurred while retrieving frames // (Deprecated) Aliases for backwards compatibility QUEUE_OK PL_DEPRECATED = PL_QUEUE_OK, QUEUE_EOF PL_DEPRECATED = PL_QUEUE_EOF, QUEUE_MORE PL_DEPRECATED = PL_QUEUE_MORE, QUEUE_ERR PL_DEPRECATED = PL_QUEUE_ERR, }; struct pl_source_frame { // The frame's presentation timestamp, in seconds relative to the first // frame. These must be monotonically increasing for subsequent frames. // To implement a discontinuous jump, users must explicitly reset the // frame queue with `pl_queue_reset` and restart from PTS 0.0. float pts; // Abstract frame data itself. To allow mapping frames only when they're // actually needed, frames use a lazy representation. The provided // callbacks will be invoked to interface with it. void *frame_data; // This will be called to map the frame to the GPU, only if needed. // // `tex` is a pointer to an array of 4 texture objects (or NULL), which // *may* serve as backing storage for the texture being mapped. These are // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or // `pl_upload_plane` as appropriate. They will be managed internally by // `pl_queue` and destroyed at some unspecified future point in time. // // Note: If `map` fails, it will not be retried, nor will `discard` be run. // The user should clean up state in this case. bool (*map)(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame); // If present, this will be called on frames that are done being used by // `pl_queue`. This may be useful to e.g. unmap textures backed by external // APIs such as hardware decoders. (Optional) void (*unmap)(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src); // This function will be called for frames that are deemed unnecessary // (e.g. never became visible) and should instead be cleanly freed. // (Optional) void (*discard)(const struct pl_source_frame *src); }; // Create a new, empty frame queue. // // It's highly recommended to fully render a single frame with `pts == 0.0`, // and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed // playback loop. pl_queue pl_queue_create(pl_gpu gpu); void pl_queue_destroy(pl_queue *queue); // Explicitly clear the queue. This is essentially equivalent to destroying // and recreating the queue, but preserves any internal memory allocations. // // Note: Calling `pl_queue_reset` may block, if another thread is currently // blocked on a different `pl_queue_*` call. void pl_queue_reset(pl_queue queue); // Explicitly push a frame. This is an alternative way to feed the frame queue // with incoming frames, the other method being the asynchronous callback // specified as `pl_queue_params.get_frame`. Both methods may be used // simultaneously, although providing `get_frame` is recommended since it // avoids the risk of the queue underrunning. // // When no more frames are available, call this function with `frame == NULL` // to indicate EOF and begin draining the frame queue. void pl_queue_push(pl_queue queue, const struct pl_source_frame *frame); // Variant of `pl_queue_push` that blocks while the queue is judged // (internally) to be "too full". This is useful for asynchronous decoder loops // in order to prevent the queue from exhausting available RAM if frames are // decoded significantly faster than they're displayed. // // The given `timeout` parameter specifies how long to wait before giving up, // in nanoseconds. Returns false if this timeout was reached. bool pl_queue_push_block(pl_queue queue, uint64_t timeout, const struct pl_source_frame *frame); struct pl_queue_params { // The PTS of the frame that will be rendered. This should be set to the // timestamp (in seconds) of the next vsync, relative to the initial frame. // // These must be monotonically increasing. To implement a discontinuous // jump, users must explicitly reset the frame queue with `pl_queue_reset` // and restart from PTS 0.0. float pts; // The radius of the configured mixer. This should be set to the value // as returned by `pl_frame_mix_radius`. float radius; // The estimated duration of a vsync, in seconds. This will only be used as // a hint, the true value will be estimated by comparing `pts` timestamps // between calls to `pl_queue_update`. (Optional) float vsync_duration; // The estimated duration of a frame, in seconds. This will only be used as // an initial hint, the true value will be estimated by comparing `pts` // timestamps between source frames. (Optional) float frame_duration; // If the difference between the (estimated) vsync duration and the // (measured) frame duration is smaller than this threshold, silently // disable interpolation and switch to ZOH semantics instead. // // For example, a value of 0.01 allows the FPS to differ by up to 1% // without being interpolated. Note that this will result in a continuous // phase drift unless also compensated for by the user, which will // eventually resulted in a dropped or duplicated frame. (Though this can // be preferable to seeing that same phase drift result in a temporally // smeared image) float interpolation_threshold; // Specifies how long `pl_queue_update` will wait for frames to become // available, in nanoseconds, before giving up and returning with // QUEUE_MORE. // // If `get_frame` is provided, this value is ignored by `pl_queue` and // should instead be interpreted by the provided callback. uint64_t timeout; // This callback will be used to pull new frames from the decoder. It may // block if needed. The user is responsible for setting appropriate time // limits and/or returning and interpreting QUEUE_MORE as sensible. // // Providing this callback is entirely optional. Users can instead choose // to manually feed the frame queue with new frames using `pl_queue_push`. enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame, const struct pl_queue_params *params); void *priv; }; #define pl_queue_params(...) (&(struct pl_queue_params) { __VA_ARGS__ }) // Advance the frame queue's internal state to the target timestamp. Any frames // which are no longer needed (i.e. too far in the past) are automatically // unmapped and evicted. Any future frames which are needed to fill the queue // must either have been pushed in advance, or will be requested using the // provided `get_frame` callback. If you call this on `out_mix == NULL`, the // queue state will advance, but no frames will be mapped. // // This function may return with PL_QUEUE_MORE, in which case the user may wish // to ensure more frames are available and then re-run this function with the // same parameters. In this case, `out_mix` is still written to, but it may be // incomplete (or even contain no frames at all). // // The resulting mix of frames in `out_mix` will represent the neighbourhood of // the target timestamp, and can be passed to `pl_render_image_mix` as-is. // // Note: `out_mix` will only remain valid until the next call to // `pl_queue_update` or `pl_queue_reset`. enum pl_queue_status pl_queue_update(pl_queue queue, struct pl_frame_mix *out_mix, const struct pl_queue_params *params); PL_API_END #endif // LIBPLACEBO_FRAME_QUEUE_H libplacebo-v4.192.1/src/include/libplacebo/utils/libav.h000066400000000000000000000235371417677245700231240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LIBAV_H_ #define LIBPLACEBO_LIBAV_H_ #include #include PL_API_BEGIN #include #include #include #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) # define PL_HAVE_LAV_DOLBY_VISION # include #endif // Fill in the details of a `pl_frame` from an AVFrame. This function will // explicitly clear `out_frame`, setting all extra fields to 0. After this // function returns, the only missing data is information related to the plane // texture itself (`planes[N].texture`), as well as any overlays (e.g. // subtitles). // // Note: If the AVFrame contains an embedded ICC profile or H.274 film grain // metadata, the resulting `out_image->profile` will reference this pointer, // meaning that in general, the `pl_frame` is only guaranteed to be valid as // long as the AVFrame is not freed. // // Note: This will ignore Dolby Vision metadata by default (to avoid leaking // memory), either switch to pl_map_avframe_ex or do it manually using // pl_map_dovi_metadata. static void pl_frame_from_avframe(struct pl_frame *out_frame, const AVFrame *frame); // Deprecated aliases for backwards compatibility #define pl_image_from_avframe pl_frame_from_avframe #define pl_target_from_avframe pl_frame_from_avframe // Copy extra metadata from an AVStream to a pl_frame. This should be called // after `pl_frame_from_avframe` or `pl_upload_avframe` (respectively), and // sets metadata associated with stream-level side data. This is needed because // FFmpeg rather annoyingly does not propagate stream-level metadata to frames. static void pl_frame_copy_stream_props(struct pl_frame *out_frame, const AVStream *stream); #ifdef PL_HAVE_LAV_DOLBY_VISION // Helper function to map Dolby Vision metadata from the FFmpeg format. static void pl_map_dovi_metadata(struct pl_dovi_metadata *out, const AVDOVIMetadata *metadata); #endif // Helper function to test if a pixfmt would be supported by the GPU. // Essentially, this can be used to check if `pl_upload_avframe` would work for // a given AVPixelFormat, without actually uploading or allocating anything. static bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt); // Like `pl_frame_from_avframe`, but the texture pointers are also initialized // to ensure they have the correct size and format to match the AVframe. // Similar in spirit to `pl_recreate_plane`, and the same notes apply. `tex` // must be an array of 4 pointers of type `pl_tex`, each either // pointing to a valid texture, or NULL. Returns whether successful. static bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *frame); struct pl_avframe_params { // The AVFrame to map. Required. const AVFrame *frame; // Backing textures for frame data. Required for all non-hwdec formats. // This must point to an array of four valid textures (or NULL entries). // // Note: Not cleaned up by `pl_unmap_avframe`. The intent is for users to // re-use this texture array for subsequent frames, to avoid texture // creation/destruction overhead. pl_tex *tex; // Also map Dolby Vision metadata (if supported). Note that this also // overrides the colorimetry metadata (forces BT.2020+PQ). bool map_dovi; }; #define PL_AVFRAME_DEFAULTS \ .map_dovi = true, #define pl_avframe_params(...) (&(struct pl_avframe_params) { PL_AVFRAME_DEFAULTS __VA_ARGS__ }) // Very high level helper function to take an `AVFrame` and map it to the GPU. // The resulting `pl_frame` remains valid until `pl_unmap_avframe` is called, // which must be called at some point to clean up state. The `AVFrame` is // automatically ref'd and unref'd if needed. Returns whether successful. // // Note: `out_frame->user_data` will hold a reference to the AVFrame // corresponding to the `pl_frame`. It will automatically be unref'd by // `pl_unmap_avframe`. static bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out_frame, const struct pl_avframe_params *params); static void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame); // Backwards compatibility with previous versions of this API. static inline bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *avframe) { return pl_map_avframe_ex(gpu, out_frame, &(struct pl_avframe_params) { .frame = avframe, .tex = tex, }); } // Deprecated variant of `pl_map_frame`, with the following differences: // - Does not support hardware-accelerated frames // - Does not require manual unmapping // - Does not touch `frame->user_data`. // - Does not automatically map dovi metadata // - `frame` must not be freed by the user before `frame` is done being used static PL_DEPRECATED bool pl_upload_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *frame); // Download the texture contents of a `pl_frame` back to a corresponding // AVFrame. Blocks until completion. // // Note: This function performs minimal verification, so incorrect usage will // likely result in broken frames. Use `pl_frame_recreate_from_avframe` to // ensure matching formats. static bool pl_download_avframe(pl_gpu gpu, const struct pl_frame *frame, AVFrame *out_frame); // Helper functions to update the colorimetry data in an AVFrame based on // the values specified in the given color space / color repr / profile. // // Note: These functions can and will allocate AVFrame side data if needed, // in particular to encode `space.sig_peak` etc. static void pl_avframe_set_color(AVFrame *frame, struct pl_color_space space); static void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr); static void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile); // Map an AVPixelFormat to an array of pl_plane_data structs. The array must // have at least `av_pix_fmt_count_planes(fmt)` elements, but never more than // 4. This function leaves `width`, `height` and `row_stride`, as well as the // data pointers, uninitialized. // // If `bits` is non-NULL, this function will attempt aligning the resulting // `pl_plane_data` struct for optimal compatibility, placing the resulting // `pl_bit_depth` metadata into `bits`. // // Returns the number of plane structs written to, or 0 on error. // // Note: This function is usually clumsier to use than the higher-level // functions above, but it might have some fringe use cases, for example if // the user wants to replace the data buffers by `pl_buf` references in the // `pl_plane_data` before uploading it to the GPU. static int pl_plane_data_from_pixfmt(struct pl_plane_data data[4], struct pl_bit_encoding *bits, enum AVPixelFormat pix_fmt); // Callback for AVCodecContext.get_buffer2 that allocates memory from // persistently mapped buffers. This can be more efficient than regular // system memory, especially on platforms that don't support importing // PL_HANDLE_HOST_PTR as buffers. // // Note: `avctx->opaque` must be a pointer that *points* to the GPU instance. // That is, it should have type `pl_gpu *`. static int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags); // Mapping functions for the various libavutil enums. Note that these are not // quite 1:1, and even for values that exist in both, the semantics sometimes // differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in // libplacebo and libavutil, respectively. // // Because of this, it's generally recommended to avoid these and instead use // helpers like `pl_frame_from_avframe`, which contain extra logic to patch // through all of the special cases. static enum pl_color_system pl_system_from_av(enum AVColorSpace spc); static enum AVColorSpace pl_system_to_av(enum pl_color_system sys); static enum pl_color_levels pl_levels_from_av(enum AVColorRange range); static enum AVColorRange pl_levels_to_av(enum pl_color_levels levels); static enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim); static enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim); static enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc); static enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc); static enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc); static enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc); // Helper function to generate a `pl_color_space` struct from an AVFrame. static void pl_color_space_from_avframe(struct pl_color_space *out_csp, const AVFrame *frame); // Deprecated alias for backwards compatibility #define pl_swapchain_colors_from_avframe pl_color_space_from_avframe // Actual implementation, included as part of this header to avoid having // a compile-time dependency on libavutil. #include PL_API_END #endif // LIBPLACEBO_LIBAV_H_ libplacebo-v4.192.1/src/include/libplacebo/utils/libav_internal.h000066400000000000000000001354131417677245700250150ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LIBAV_H_ # error This header should be included as part of #else #include #include #include #include #include #include #include #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 61, 100) # define HAVE_LAV_FILM_GRAIN # include #endif #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 8, 100) && defined(PL_HAVE_VULKAN) # define HAVE_LAV_VULKAN # include # include #endif static inline enum pl_color_system pl_system_from_av(enum AVColorSpace spc) { switch (spc) { case AVCOL_SPC_RGB: return PL_COLOR_SYSTEM_RGB; case AVCOL_SPC_BT709: return PL_COLOR_SYSTEM_BT_709; case AVCOL_SPC_UNSPECIFIED: return PL_COLOR_SYSTEM_UNKNOWN; case AVCOL_SPC_RESERVED: return PL_COLOR_SYSTEM_UNKNOWN; case AVCOL_SPC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_BT470BG: return PL_COLOR_SYSTEM_BT_601; case AVCOL_SPC_SMPTE170M: return PL_COLOR_SYSTEM_BT_601; case AVCOL_SPC_SMPTE240M: return PL_COLOR_SYSTEM_SMPTE_240M; case AVCOL_SPC_YCGCO: return PL_COLOR_SYSTEM_YCGCO; case AVCOL_SPC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; case AVCOL_SPC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; case AVCOL_SPC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_CHROMA_DERIVED_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_CHROMA_DERIVED_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing // Note: this colorspace is confused between PQ and HLG, which libav* // requires inferring from other sources, but libplacebo makes explicit. // Default to PQ as it's the more common scenario. case AVCOL_SPC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; case AVCOL_SPC_NB: return PL_COLOR_SYSTEM_COUNT; } return PL_COLOR_SYSTEM_UNKNOWN; } static inline enum AVColorSpace pl_system_to_av(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: return AVCOL_SPC_UNSPECIFIED; case PL_COLOR_SYSTEM_BT_601: return AVCOL_SPC_SMPTE170M; case PL_COLOR_SYSTEM_BT_709: return AVCOL_SPC_BT709; case PL_COLOR_SYSTEM_SMPTE_240M: return AVCOL_SPC_SMPTE240M; case PL_COLOR_SYSTEM_BT_2020_NC: return AVCOL_SPC_BT2020_NCL; case PL_COLOR_SYSTEM_BT_2020_C: return AVCOL_SPC_BT2020_CL; case PL_COLOR_SYSTEM_BT_2100_PQ: return AVCOL_SPC_ICTCP; case PL_COLOR_SYSTEM_BT_2100_HLG: return AVCOL_SPC_ICTCP; case PL_COLOR_SYSTEM_DOLBYVISION: return AVCOL_SPC_UNSPECIFIED; // missing case PL_COLOR_SYSTEM_YCGCO: return AVCOL_SPC_YCGCO; case PL_COLOR_SYSTEM_RGB: return AVCOL_SPC_RGB; case PL_COLOR_SYSTEM_XYZ: return AVCOL_SPC_UNSPECIFIED; // handled differently case PL_COLOR_SYSTEM_COUNT: return AVCOL_SPC_NB; } return AVCOL_SPC_UNSPECIFIED; } static inline enum pl_color_levels pl_levels_from_av(enum AVColorRange range) { switch (range) { case AVCOL_RANGE_UNSPECIFIED: return PL_COLOR_LEVELS_UNKNOWN; case AVCOL_RANGE_MPEG: return PL_COLOR_LEVELS_LIMITED; case AVCOL_RANGE_JPEG: return PL_COLOR_LEVELS_FULL; case AVCOL_RANGE_NB: return PL_COLOR_LEVELS_COUNT; } return PL_COLOR_LEVELS_UNKNOWN; } static inline enum AVColorRange pl_levels_to_av(enum pl_color_levels levels) { switch (levels) { case PL_COLOR_LEVELS_UNKNOWN: return AVCOL_RANGE_UNSPECIFIED; case PL_COLOR_LEVELS_LIMITED: return AVCOL_RANGE_MPEG; case PL_COLOR_LEVELS_FULL: return AVCOL_RANGE_JPEG; case PL_COLOR_LEVELS_COUNT: return AVCOL_RANGE_NB; } return AVCOL_RANGE_UNSPECIFIED; } static inline enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim) { switch (prim) { case AVCOL_PRI_RESERVED0: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_BT709: return PL_COLOR_PRIM_BT_709; case AVCOL_PRI_UNSPECIFIED: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; case AVCOL_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; case AVCOL_PRI_SMPTE170M: return PL_COLOR_PRIM_BT_601_525; case AVCOL_PRI_SMPTE240M: return PL_COLOR_PRIM_BT_601_525; case AVCOL_PRI_FILM: return PL_COLOR_PRIM_FILM_C; case AVCOL_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; case AVCOL_PRI_SMPTE428: return PL_COLOR_PRIM_CIE_1931; case AVCOL_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; case AVCOL_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; case AVCOL_PRI_JEDEC_P22: return PL_COLOR_PRIM_EBU_3213; case AVCOL_PRI_NB: return PL_COLOR_PRIM_COUNT; } return PL_COLOR_PRIM_UNKNOWN; } static inline enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: return AVCOL_PRI_UNSPECIFIED; case PL_COLOR_PRIM_BT_601_525: return AVCOL_PRI_SMPTE170M; case PL_COLOR_PRIM_BT_601_625: return AVCOL_PRI_BT470BG; case PL_COLOR_PRIM_BT_709: return AVCOL_PRI_BT709; case PL_COLOR_PRIM_BT_470M: return AVCOL_PRI_BT470M; case PL_COLOR_PRIM_EBU_3213: return AVCOL_PRI_JEDEC_P22; case PL_COLOR_PRIM_BT_2020: return AVCOL_PRI_BT2020; case PL_COLOR_PRIM_APPLE: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_ADOBE: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_PRO_PHOTO: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_CIE_1931: return AVCOL_PRI_SMPTE428; case PL_COLOR_PRIM_DCI_P3: return AVCOL_PRI_SMPTE431; case PL_COLOR_PRIM_DISPLAY_P3: return AVCOL_PRI_SMPTE432; case PL_COLOR_PRIM_V_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_S_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_FILM_C: return AVCOL_PRI_FILM; case PL_COLOR_PRIM_COUNT: return AVCOL_PRI_NB; } return AVCOL_PRI_UNSPECIFIED; } static inline enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc) { switch (trc) { case AVCOL_TRC_RESERVED0: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_UNSPECIFIED: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_RESERVED: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_GAMMA22: return PL_COLOR_TRC_GAMMA22; case AVCOL_TRC_GAMMA28: return PL_COLOR_TRC_GAMMA28; case AVCOL_TRC_SMPTE170M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_SMPTE240M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; case AVCOL_TRC_LOG: return PL_COLOR_TRC_UNKNOWN; // missing case AVCOL_TRC_LOG_SQRT: return PL_COLOR_TRC_UNKNOWN; // missing case AVCOL_TRC_IEC61966_2_4: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_BT1361_ECG: return PL_COLOR_TRC_BT_1886; // ETOF != OETF case AVCOL_TRC_IEC61966_2_1: return PL_COLOR_TRC_SRGB; case AVCOL_TRC_BT2020_10: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_BT2020_12: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; case AVCOL_TRC_SMPTE428: return PL_COLOR_TRC_UNKNOWN; // missing case AVCOL_TRC_ARIB_STD_B67: return PL_COLOR_TRC_HLG; case AVCOL_TRC_NB: return PL_COLOR_TRC_COUNT; } return PL_COLOR_TRC_UNKNOWN; } static inline enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: return AVCOL_TRC_UNSPECIFIED; case PL_COLOR_TRC_BT_1886: return AVCOL_TRC_BT709; // EOTF != OETF case PL_COLOR_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; case PL_COLOR_TRC_LINEAR: return AVCOL_TRC_LINEAR; case PL_COLOR_TRC_GAMMA18: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA20: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; case PL_COLOR_TRC_GAMMA24: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA26: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; case PL_COLOR_TRC_PRO_PHOTO: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_PQ: return AVCOL_TRC_SMPTE2084; case PL_COLOR_TRC_HLG: return AVCOL_TRC_ARIB_STD_B67; case PL_COLOR_TRC_V_LOG: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_S_LOG1: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_S_LOG2: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_COUNT: return AVCOL_TRC_NB; } return AVCOL_TRC_UNSPECIFIED; } static inline enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc) { switch (loc) { case AVCHROMA_LOC_UNSPECIFIED: return PL_CHROMA_UNKNOWN; case AVCHROMA_LOC_LEFT: return PL_CHROMA_LEFT; case AVCHROMA_LOC_CENTER: return PL_CHROMA_CENTER; case AVCHROMA_LOC_TOPLEFT: return PL_CHROMA_TOP_LEFT; case AVCHROMA_LOC_TOP: return PL_CHROMA_TOP_CENTER; case AVCHROMA_LOC_BOTTOMLEFT: return PL_CHROMA_BOTTOM_LEFT; case AVCHROMA_LOC_BOTTOM: return PL_CHROMA_BOTTOM_CENTER; case AVCHROMA_LOC_NB: return PL_CHROMA_COUNT; } return PL_CHROMA_UNKNOWN; } static inline enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc) { switch (loc) { case PL_CHROMA_UNKNOWN: return AVCHROMA_LOC_UNSPECIFIED; case PL_CHROMA_LEFT: return AVCHROMA_LOC_LEFT; case PL_CHROMA_CENTER: return AVCHROMA_LOC_CENTER; case PL_CHROMA_TOP_LEFT: return AVCHROMA_LOC_TOPLEFT; case PL_CHROMA_TOP_CENTER: return AVCHROMA_LOC_TOP; case PL_CHROMA_BOTTOM_LEFT: return AVCHROMA_LOC_BOTTOMLEFT; case PL_CHROMA_BOTTOM_CENTER: return AVCHROMA_LOC_BOTTOM; case PL_CHROMA_COUNT: return AVCHROMA_LOC_NB; } return AVCHROMA_LOC_UNSPECIFIED; } static void pl_color_space_from_avframe(struct pl_color_space *out_csp, const AVFrame *frame) { const AVFrameSideData *sd; bool is_hdr, is_wide; *out_csp = (struct pl_swapchain_colors) { .primaries = pl_primaries_from_av(frame->color_primaries), .transfer = pl_transfer_from_av(frame->color_trc), }; // Ignore mastering metadata for non-HDR/wide gamut content is_hdr = pl_color_transfer_is_hdr(out_csp->transfer); is_wide = pl_color_primaries_is_wide_gamut(out_csp->primaries); if (is_hdr && (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL))) { const AVContentLightMetadata *clm = (AVContentLightMetadata *) sd->data; out_csp->hdr.max_cll = clm->MaxCLL; out_csp->hdr.max_fall = clm->MaxFALL; } if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA))) { const AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd->data; if (is_hdr && mdm->has_luminance) { out_csp->hdr.max_luma = av_q2d(mdm->max_luminance); out_csp->hdr.min_luma = av_q2d(mdm->min_luminance); } if (is_wide && mdm->has_primaries) { out_csp->hdr.prim = (struct pl_raw_primaries) { .red.x = av_q2d(mdm->display_primaries[0][0]), .red.y = av_q2d(mdm->display_primaries[0][1]), .green.x = av_q2d(mdm->display_primaries[1][0]), .green.y = av_q2d(mdm->display_primaries[1][1]), .blue.x = av_q2d(mdm->display_primaries[2][0]), .blue.y = av_q2d(mdm->display_primaries[2][1]), .white.x = av_q2d(mdm->white_point[0]), .white.y = av_q2d(mdm->white_point[1]), }; } } } static inline int pl_plane_data_num_comps(const struct pl_plane_data *data) { for (int i = 0; i < 4; i++) { if (data->component_size[i] == 0) return i; } return 4; } static inline int pl_plane_data_from_pixfmt(struct pl_plane_data out_data[4], struct pl_bit_encoding *out_bits, enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); int planes = av_pix_fmt_count_planes(pix_fmt); struct pl_plane_data aligned_data[4]; struct pl_bit_encoding bits; bool first; if (!desc || planes < 0) // e.g. AV_PIX_FMT_NONE return 0; if (desc->flags & AV_PIX_FMT_FLAG_BE) { // Big endian formats are almost definitely not supported in any // reasonable manner, erroring as a safety precation return 0; } if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) { // Bitstream formats will most likely never be supported return 0; } if (desc->flags & AV_PIX_FMT_FLAG_PAL) { // Palette formats are (currently) not supported return 0; } if (desc->flags & AV_PIX_FMT_FLAG_BAYER) { // Bayer format don't have valid `desc->offset` values, so we can't // use `pl_plane_data_from_mask` on them. return 0; } if (desc->nb_components == 0 || desc->nb_components > 4) { // Bogus components, possibly fake/virtual/hwaccel format? return 0; } if (planes > 4) return 0; // This shouldn't ever happen // Fill in the details for each plane for (int p = 0; p < planes; p++) { struct pl_plane_data *data = &out_data[p]; uint64_t masks[4] = {0}; data->type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? PL_FMT_FLOAT : PL_FMT_UNORM; data->pixel_stride = 0; for (int c = 0; c < desc->nb_components; c++) { const AVComponentDescriptor *comp = &desc->comp[c]; if (comp->plane != p) continue; masks[c] = (1LLU << comp->depth) - 1; // e.g. 0xFF for depth=8 masks[c] <<= comp->shift; masks[c] <<= comp->offset * 8; if (data->pixel_stride && (int) data->pixel_stride != comp->step) { // Pixel format contains components with different pixel stride // (e.g. packed YUYV), this is currently not supported return 0; } data->pixel_stride = comp->step; } pl_plane_data_from_mask(data, masks); } if (!out_bits) return planes; // Attempt aligning all of the planes for optimum compatibility first = true; for (int p = 0; p < planes; p++) { aligned_data[p] = out_data[p]; // Planes with only an alpha component should be ignored if (pl_plane_data_num_comps(&aligned_data[p]) == 1 && aligned_data[p].component_map[0] == PL_CHANNEL_A) { continue; } if (!pl_plane_data_align(&aligned_data[p], &bits)) goto misaligned; if (first) { *out_bits = bits; first = false; } else { if (!pl_bit_encoding_equal(&bits, out_bits)) goto misaligned; } } // Overwrite the planes by their aligned versions for (int p = 0; p < planes; p++) out_data[p] = aligned_data[p]; return planes; misaligned: *out_bits = (struct pl_bit_encoding) {0}; return planes; } static inline bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt) { struct pl_bit_encoding bits; struct pl_plane_data data[4]; int planes; switch (pixfmt) { case AV_PIX_FMT_DRM_PRIME: case AV_PIX_FMT_VAAPI: return gpu->import_caps.tex & PL_HANDLE_DMA_BUF; #ifdef HAVE_LAV_VULKAN case AV_PIX_FMT_VULKAN: return pl_vulkan_get(gpu); #endif default: break; } planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt); if (!planes) return false; for (int i = 0; i < planes; i++) { data[i].row_stride = 0; if (!pl_plane_find_fmt(gpu, NULL, &data[i])) return false; } return true; } static inline void pl_avframe_set_color(AVFrame *frame, struct pl_color_space csp) { const AVFrameSideData *sd; frame->color_primaries = pl_primaries_to_av(csp.primaries); frame->color_trc = pl_transfer_to_av(csp.transfer); if (csp.hdr.max_cll) { sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); if (!sd) { sd = av_frame_new_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, sizeof(AVContentLightMetadata)); } if (sd) { AVContentLightMetadata *clm = (AVContentLightMetadata *) sd->data; *clm = (AVContentLightMetadata) { .MaxCLL = csp.hdr.max_cll, .MaxFALL = csp.hdr.max_fall, }; } } if (csp.hdr.max_luma || csp.hdr.prim.red.x) { sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); if (!sd) { sd = av_frame_new_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, sizeof(AVMasteringDisplayMetadata)); } if (sd) { AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd->data; *mdm = (AVMasteringDisplayMetadata) { .max_luminance = av_d2q(csp.hdr.max_luma, 1000000), .min_luminance = av_d2q(csp.hdr.min_luma, 1000000), .has_luminance = !!csp.hdr.max_luma, .display_primaries = { { av_d2q(csp.hdr.prim.red.x, 1000000), av_d2q(csp.hdr.prim.red.y, 1000000), }, { av_d2q(csp.hdr.prim.green.x, 1000000), av_d2q(csp.hdr.prim.green.y, 1000000), }, { av_d2q(csp.hdr.prim.blue.x, 1000000), av_d2q(csp.hdr.prim.blue.y, 1000000), } }, .white_point = { av_d2q(csp.hdr.prim.white.x, 1000000), av_d2q(csp.hdr.prim.white.y, 1000000), }, .has_primaries = !!csp.hdr.prim.red.x, }; } } } static inline void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr) { frame->colorspace = pl_system_to_av(repr.sys); frame->color_range = pl_levels_to_av(repr.levels); // No real way to map repr.bits, the image format already has to match } static inline void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile) { const AVFrameSideData *sd; av_frame_remove_side_data(frame, AV_FRAME_DATA_ICC_PROFILE); if (!profile.len) return; sd = av_frame_new_side_data(frame, AV_FRAME_DATA_ICC_PROFILE, profile.len); memcpy(sd->data, profile.data, profile.len); } static inline void pl_frame_from_avframe(struct pl_frame *out, const AVFrame *frame) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); int planes = av_pix_fmt_count_planes(frame->format); const AVFrameSideData *sd; assert(desc); if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; desc = av_pix_fmt_desc_get(hwfc->sw_format); planes = av_pix_fmt_count_planes(hwfc->sw_format); } // This should never fail, and there's nothing really useful we can do in // this failure case anyway, since this is a `void` function. assert(planes <= 4); *out = (struct pl_frame) { .num_planes = planes, .crop = { .x0 = frame->crop_left, .y0 = frame->crop_top, .x1 = frame->width - frame->crop_right, .y1 = frame->height - frame->crop_bottom, }, .repr = { .sys = pl_system_from_av(frame->colorspace), .levels = pl_levels_from_av(frame->color_range), .alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) ? PL_ALPHA_INDEPENDENT : PL_ALPHA_UNKNOWN, // For sake of simplicity, just use the first component's depth as // the authoritative color depth for the whole image. Usually, this // will be overwritten by more specific information when using e.g. // `pl_map_avframe`, but for the sake of e.g. users wishing to map // hwaccel frames manually, this is a good default. .bits.color_depth = desc->comp[0].depth, }, }; pl_color_space_from_avframe(&out->color, frame); if (frame->colorspace == AVCOL_SPC_ICTCP && frame->color_trc == AVCOL_TRC_ARIB_STD_B67) { // libav* makes no distinction between PQ and HLG ICtCp, so we need // to manually fix it in the case that we have HLG ICtCp data. out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; } else if (strncmp(desc->name, "xyz", 3) == 0) { // libav* handles this as a special case, but doesn't provide an // explicit flag for it either, so we have to resort to this ugly // hack... out->repr.sys= PL_COLOR_SYSTEM_XYZ; } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) { out->repr.sys = PL_COLOR_SYSTEM_RGB; out->repr.levels = PL_COLOR_LEVELS_FULL; // libav* ignores levels for RGB } else if (!out->repr.sys) { // libav* likes leaving this as UNKNOWN for YCbCr frames, which // confuses libplacebo since we infer UNKNOWN as RGB. To get around // this, explicitly infer a suitable colorspace for non-RGB formats. out->repr.sys = pl_color_system_guess_ycbcr(frame->width, frame->height); } if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE))) { out->profile = (struct pl_icc_profile) { .data = sd->data, .len = sd->size, }; // Needed to ensure profile uniqueness pl_icc_profile_compute_signature(&out->profile); } if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX))) { double rot = av_display_rotation_get((const int32_t *) sd->data); out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); } #ifdef HAVE_LAV_FILM_GRAIN if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_FILM_GRAIN_PARAMS))) { const AVFilmGrainParams *fgp = (AVFilmGrainParams *) sd->data; out->film_grain.seed = fgp->seed; switch (fgp->type) { case AV_FILM_GRAIN_PARAMS_NONE: break; case AV_FILM_GRAIN_PARAMS_AV1: { const AVFilmGrainAOMParams *src = &fgp->codec.aom; struct pl_av1_grain_data *dst = &out->film_grain.params.av1; out->film_grain.type = PL_FILM_GRAIN_AV1; *dst = (struct pl_av1_grain_data) { .num_points_y = src->num_y_points, .chroma_scaling_from_luma = src->chroma_scaling_from_luma, .num_points_uv = { src->num_uv_points[0], src->num_uv_points[1] }, .scaling_shift = src->scaling_shift, .ar_coeff_lag = src->ar_coeff_lag, .ar_coeff_shift = src->ar_coeff_shift, .grain_scale_shift = src->grain_scale_shift, .uv_mult = { src->uv_mult[0], src->uv_mult[1] }, .uv_mult_luma = { src->uv_mult_luma[0], src->uv_mult_luma[1] }, .uv_offset = { src->uv_offset[0], src->uv_offset[1] }, .overlap = src->overlap_flag, }; assert(sizeof(dst->ar_coeffs_uv) == sizeof(src->ar_coeffs_uv)); memcpy(dst->points_y, src->y_points, sizeof(dst->points_y)); memcpy(dst->points_uv, src->uv_points, sizeof(dst->points_uv)); memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(dst->ar_coeffs_y)); memcpy(dst->ar_coeffs_uv, src->ar_coeffs_uv, sizeof(dst->ar_coeffs_uv)); break; } #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 2, 100) case AV_FILM_GRAIN_PARAMS_H274: { const AVFilmGrainH274Params *src = &fgp->codec.h274; struct pl_h274_grain_data *dst = &out->film_grain.params.h274; out->film_grain.type = PL_FILM_GRAIN_H274; *dst = (struct pl_h274_grain_data) { .model_id = src->model_id, .blending_mode_id = src->blending_mode_id, .log2_scale_factor = src->log2_scale_factor, .component_model_present = { src->component_model_present[0], src->component_model_present[1], src->component_model_present[2], }, .intensity_interval_lower_bound = { src->intensity_interval_lower_bound[0], src->intensity_interval_lower_bound[1], src->intensity_interval_lower_bound[2], }, .intensity_interval_upper_bound = { src->intensity_interval_upper_bound[0], src->intensity_interval_upper_bound[1], src->intensity_interval_upper_bound[2], }, .comp_model_value = { src->comp_model_value[0], src->comp_model_value[1], src->comp_model_value[2], }, }; memcpy(dst->num_intensity_intervals, src->num_intensity_intervals, sizeof(dst->num_intensity_intervals)); memcpy(dst->num_model_values, src->num_model_values, sizeof(dst->num_model_values)); break; } #endif } } #endif // HAVE_LAV_FILM_GRAIN for (int p = 0; p < out->num_planes; p++) { struct pl_plane *plane = &out->planes[p]; // Fill in the component mapping array for (int c = 0; c < desc->nb_components; c++) { if (desc->comp[c].plane == p) plane->component_mapping[plane->components++] = c; } // Clear the superfluous components for (int c = plane->components; c < 4; c++) plane->component_mapping[c] = PL_CHANNEL_NONE; } // Only set the chroma location for definitely subsampled images, makes no // sense otherwise if (desc->log2_chroma_w || desc->log2_chroma_h) { enum pl_chroma_location loc = pl_chroma_from_av(frame->chroma_location); pl_frame_set_chroma_location(out, loc); } } static inline void pl_frame_copy_stream_props(struct pl_frame *out, const AVStream *stream) { const uint8_t *sd; if ((sd = av_stream_get_side_data(stream, AV_PKT_DATA_CONTENT_LIGHT_LEVEL, NULL))) { const AVContentLightMetadata *clm = (AVContentLightMetadata *) sd; out->color.hdr.max_cll = clm->MaxCLL; out->color.hdr.max_fall = clm->MaxFALL; } if ((sd = av_stream_get_side_data(stream, AV_PKT_DATA_MASTERING_DISPLAY_METADATA, NULL))) { const AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd; if (mdm->has_luminance) { out->color.hdr.max_cll = av_q2d(mdm->max_luminance); out->color.hdr.max_fall = av_q2d(mdm->min_luminance); } if (mdm->has_primaries) { out->color.hdr.prim = (struct pl_raw_primaries) { .red.x = av_q2d(mdm->display_primaries[0][0]), .red.y = av_q2d(mdm->display_primaries[0][1]), .green.x = av_q2d(mdm->display_primaries[1][0]), .green.y = av_q2d(mdm->display_primaries[1][1]), .blue.x = av_q2d(mdm->display_primaries[2][0]), .blue.y = av_q2d(mdm->display_primaries[2][1]), .white.x = av_q2d(mdm->white_point[0]), .white.y = av_q2d(mdm->white_point[1]), }; } } if ((sd = av_stream_get_side_data(stream, AV_PKT_DATA_DISPLAYMATRIX, NULL))) { double rot = av_display_rotation_get((const int32_t *) sd); out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); } } #ifdef PL_HAVE_LAV_DOLBY_VISION static inline void pl_map_dovi_metadata(struct pl_dovi_metadata *out, const AVDOVIMetadata *data) { const AVDOVIRpuDataHeader *header; const AVDOVIDataMapping *mapping; const AVDOVIColorMetadata *color; if (!data) return; header = av_dovi_get_header(data); mapping = av_dovi_get_mapping(data); color = av_dovi_get_color(data); for (int i = 0; i < 3; i++) out->nonlinear_offset[i] = av_q2d(color->ycc_to_rgb_offset[i]); for (int i = 0; i < 9; i++) { float *nonlinear = &out->nonlinear.m[0][0]; float *linear = &out->linear.m[0][0]; nonlinear[i] = av_q2d(color->ycc_to_rgb_matrix[i]); linear[i] = av_q2d(color->rgb_to_lms_matrix[i]); } for (int c = 0; c < 3; c++) { const AVDOVIReshapingCurve *csrc = &mapping->curves[c]; struct pl_reshape_data *cdst = &out->comp[c]; cdst->num_pivots = csrc->num_pivots; for (int i = 0; i < csrc->num_pivots; i++) { const float scale = 1.0f / ((1 << header->bl_bit_depth) - 1); cdst->pivots[i] = scale * csrc->pivots[i]; } for (int i = 0; i < csrc->num_pivots - 1; i++) { const float scale = 1.0f / (1 << header->coef_log2_denom); cdst->method[i] = csrc->mapping_idc[i]; switch (csrc->mapping_idc[i]) { case AV_DOVI_MAPPING_POLYNOMIAL: for (int k = 0; k < 3; k++) { cdst->poly_coeffs[i][k] = (k <= csrc->poly_order[i]) ? scale * csrc->poly_coef[i][k] : 0.0f; } break; case AV_DOVI_MAPPING_MMR: cdst->mmr_order[i] = csrc->mmr_order[i]; cdst->mmr_constant[i] = scale * csrc->mmr_constant[i]; for (int j = 0; j < csrc->mmr_order[i]; j++) { for (int k = 0; k < 7; k++) cdst->mmr_coeffs[i][j][k] = scale * csrc->mmr_coef[i][j][k]; } break; } } } } #endif // PL_HAVE_LAV_DOLBY_VISION static inline bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out, pl_tex tex[4], const AVFrame *frame) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); struct pl_plane_data data[4] = {0}; int planes; pl_frame_from_avframe(out, frame); planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); if (!planes) return false; for (int p = 0; p < planes; p++) { bool is_chroma = p == 1 || p == 2; // matches lavu logic data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); if (!pl_recreate_plane(gpu, &out->planes[p], &tex[p], &data[p])) return false; } return true; } static void pl_avframe_free_cb(void *priv) { AVFrame *frame = priv; av_frame_free(&frame); } #define PL_MAGIC0 0xfb5b3b8b #define PL_MAGIC1 0xee659f6d struct pl_avalloc { uint32_t magic[2]; pl_gpu gpu; pl_buf buf; }; static void pl_fix_hwframe_sample_depth(struct pl_frame *out, const AVFrame *frame) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; pl_fmt fmt = out->planes[0].texture->params.format; struct pl_bit_encoding *bits = &out->repr.bits; bits->sample_depth = fmt->component_depth[0]; switch (hwfc->sw_format) { case AV_PIX_FMT_P010: bits->bit_shift = 6; break; default: break; } } static bool pl_map_avframe_drm(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); const AVDRMFrameDescriptor *drm = (AVDRMFrameDescriptor *) frame->data[0]; assert(frame->format == AV_PIX_FMT_DRM_PRIME); if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF)) return false; assert(drm->nb_layers >= out->num_planes); for (int n = 0; n < out->num_planes; n++) { const AVDRMLayerDescriptor *layer = &drm->layers[n]; const AVDRMPlaneDescriptor *plane = &layer->planes[0]; const AVDRMObjectDescriptor *object = &drm->objects[plane->object_index]; pl_fmt fmt = pl_find_fourcc(gpu, layer->format); bool is_chroma = n == 1 || n == 2; if (!fmt || !pl_fmt_has_modifier(fmt, object->format_modifier)) return false; assert(layer->nb_planes == 1); // we only support planar formats out->planes[n].texture = pl_tex_create(gpu, pl_tex_params( .w = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0), .h = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0), .format = fmt, .sampleable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, .import_handle = PL_HANDLE_DMA_BUF, .shared_mem = { .handle.fd = object->fd, .size = object->size, .offset = plane->offset, .drm_format_mod = object->format_modifier, .stride_w = plane->pitch, }, )); if (!out->planes[n].texture) return false; } pl_fix_hwframe_sample_depth(out, frame); return true; } // Derive a DMABUF from any other hwaccel format, and map that instead static bool pl_map_avframe_derived(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const int flags = AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_DIRECT; AVFrame *derived = av_frame_alloc(); derived->width = frame->width; derived->height = frame->height; derived->format = AV_PIX_FMT_DRM_PRIME; derived->hw_frames_ctx = av_buffer_ref(frame->hw_frames_ctx); if (av_hwframe_map(derived, frame, flags) < 0) goto error; if (av_frame_copy_props(derived, frame) < 0) goto error; if (!pl_map_avframe_drm(gpu, out, derived)) goto error; av_frame_free((AVFrame **) &out->user_data); out->user_data = derived; return true; error: av_frame_free(&derived); return false; } #ifdef HAVE_LAV_VULKAN static bool pl_map_avframe_vulkan(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); const AVVulkanFramesContext *vkfc = hwfc->hwctx; const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); AVVkFrame *vkf = (AVVkFrame *) frame->data[0]; pl_vulkan vk = pl_vulkan_get(gpu); assert(frame->format == AV_PIX_FMT_VULKAN); if (!vk) return false; for (int n = 0; n < out->num_planes; n++) { pl_tex *tex = &out->planes[n].texture; bool chroma = n == 1 || n == 2; *tex = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = vkf->img[n], .width = AV_CEIL_RSHIFT(frame->width, chroma ? desc->log2_chroma_w : 0), .height = AV_CEIL_RSHIFT(frame->height, chroma ? desc->log2_chroma_h : 0), .format = vk_fmt[n], .usage = vkfc->usage, )); if (!*tex) return false; pl_vulkan_release(gpu, *tex, vkf->layout[n], (pl_vulkan_sem) { .sem = vkf->sem[n], .value = vkf->sem_value[n], }); } pl_fix_hwframe_sample_depth(out, frame); return true; } static void pl_unmap_avframe_vulkan(pl_gpu gpu, struct pl_frame *frame) { const AVFrame *avframe = frame->user_data; AVVkFrame *vkf = (AVVkFrame *) avframe->data[0]; int ok; for (int n = 0; n < frame->num_planes; n++) { pl_tex *tex = &frame->planes[n].texture; if (!*tex) continue; ok = pl_vulkan_hold_raw(gpu, *tex, &vkf->layout[n], (pl_vulkan_sem) { .sem = vkf->sem[n], .value = vkf->sem_value[n] + 1, }); vkf->access[n] = 0; vkf->sem_value[n] += !!ok; pl_tex_destroy(gpu, tex); } } #endif static inline bool pl_map_avframe_internal(pl_gpu gpu, struct pl_frame *out, const struct pl_avframe_params *params, bool can_alloc) { const AVFrame *frame = params->frame; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); struct pl_plane_data data[4] = {0}; struct pl_avalloc *alloc; pl_tex *tex = params->tex; int planes; pl_frame_from_avframe(out, frame); if (can_alloc) out->user_data = av_frame_clone(frame); #ifdef PL_HAVE_LAV_DOLBY_VISION if (can_alloc && params->map_dovi) { AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA); if (sd) { const AVDOVIMetadata *metadata = (AVDOVIMetadata *) sd->data; const AVDOVIColorMetadata *color = av_dovi_get_color(metadata); struct pl_dovi_metadata *dovi = malloc(sizeof(*dovi)); if (!dovi) goto error; // oom pl_map_dovi_metadata(dovi, metadata); out->repr.dovi = dovi; out->repr.sys = PL_COLOR_SYSTEM_DOLBYVISION; out->color.primaries = PL_COLOR_PRIM_BT_2020; out->color.transfer = PL_COLOR_TRC_PQ; out->color.hdr.min_luma = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_min_pq / 4095.0f); out->color.hdr.max_luma = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_max_pq / 4095.0f); } } #endif switch (frame->format) { case AV_PIX_FMT_DRM_PRIME: if (!pl_map_avframe_drm(gpu, out, frame)) goto error; return true; case AV_PIX_FMT_VAAPI: if (!pl_map_avframe_derived(gpu, out, frame)) goto error; return true; #ifdef HAVE_LAV_VULKAN case AV_PIX_FMT_VULKAN: if (!pl_map_avframe_vulkan(gpu, out, frame)) goto error; return true; #endif default: break; } // Backing textures are required from this point onwards if (!tex) goto error; planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); if (!planes) goto error; for (int p = 0; p < planes; p++) { bool is_chroma = p == 1 || p == 2; // matches lavu logic data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); data[p].row_stride = frame->linesize[p]; data[p].pixels = frame->data[p]; // Probe for frames allocated by pl_get_buffer2 alloc = frame->buf[p] ? av_buffer_get_opaque(frame->buf[p]) : NULL; if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { data[p].pixels = NULL; data[p].buf = alloc->buf; data[p].buf_offset = (uintptr_t) frame->data[p] - (uintptr_t) alloc->buf->data; } else if (gpu->limits.callbacks) { // Use asynchronous upload if possible data[p].callback = pl_avframe_free_cb; data[p].priv = av_frame_clone(frame); } if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { av_frame_free((AVFrame **) &data[p].priv); goto error; } out->planes[p].texture = tex[p]; } return true; error: pl_unmap_avframe(gpu, out); return false; } static inline bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out_frame, const struct pl_avframe_params *params) { return pl_map_avframe_internal(gpu, out_frame, params, true); } static inline void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame) { AVFrame *avframe = frame->user_data; const AVPixFmtDescriptor *desc; if (!avframe) goto done; #ifdef HAVE_LAV_VULKAN if (avframe->format == AV_PIX_FMT_VULKAN) pl_unmap_avframe_vulkan(gpu, frame); #endif desc = av_pix_fmt_desc_get(avframe->format); if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { for (int i = 0; i < 4; i++) pl_tex_destroy(gpu, &frame->planes[i].texture); } av_frame_free(&avframe); done: memset(frame, 0, sizeof(*frame)); // sanity } static inline bool pl_upload_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *frame) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); struct pl_avframe_params params = { .frame = frame, .tex = tex, }; if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) return false; // requires allocation if (!pl_map_avframe_internal(gpu, out_frame, ¶ms, false)) return false; return true; } static void pl_done_cb(void *priv) { bool *status = priv; *status = true; } static inline bool pl_download_avframe(pl_gpu gpu, const struct pl_frame *frame, AVFrame *out_frame) { bool done[4] = {0}; if (frame->num_planes != av_pix_fmt_count_planes(out_frame->format)) return false; for (int p = 0; p < frame->num_planes; p++) { bool ok = pl_tex_download(gpu, pl_tex_transfer_params( .tex = frame->planes[p].texture, .row_pitch = out_frame->linesize[p], .ptr = out_frame->data[p], // Use synchronous transfer for the last plane .callback = (p+1) < frame->num_planes ? pl_done_cb : NULL, .priv = &done[p], )); if (!ok) return false; } for (int p = 0; p < frame->num_planes - 1; p++) { while (!done[p]) pl_tex_poll(gpu, frame->planes[p].texture, UINT64_MAX); } return true; } #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) #define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) static inline void pl_avalloc_free(void *opaque, uint8_t *data) { struct pl_avalloc *alloc = opaque; assert(alloc->magic[0] == PL_MAGIC0); assert(alloc->magic[1] == PL_MAGIC1); assert(alloc->buf->data == data); pl_buf_destroy(alloc->gpu, &alloc->buf); free(alloc); } static inline int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags) { int alignment[AV_NUM_DATA_POINTERS]; int width = pic->width; int height = pic->height; size_t planesize[4]; int ret = 0; pl_gpu *pgpu = avctx->opaque; pl_gpu gpu = pgpu ? *pgpu : NULL; struct pl_plane_data data[4]; struct pl_avalloc *alloc; int planes = pl_plane_data_from_pixfmt(data, NULL, pic->format); // Sanitize frame structs memset(pic->data, 0, sizeof(pic->data)); memset(pic->linesize, 0, sizeof(pic->linesize)); memset(pic->buf, 0, sizeof(pic->buf)); pic->extended_data = pic->data; pic->extended_buf = NULL; if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1) || !planes) goto fallback; if (!gpu || !gpu->limits.thread_safe || !gpu->limits.max_mapped_size) goto fallback; avcodec_align_dimensions2(avctx, &width, &height, alignment); if ((ret = av_image_fill_linesizes(pic->linesize, pic->format, width))) return ret; for (int p = 0; p < 4; p++) { alignment[p] = PL_ALIGN2(alignment[p], gpu->limits.align_tex_xfer_pitch); alignment[p] = PL_ALIGN2(alignment[p], gpu->limits.align_tex_xfer_offset); pic->linesize[p] = PL_ALIGN2(pic->linesize[p], alignment[p]); } #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) ret = av_image_fill_plane_sizes(planesize, pic->format, height, (ptrdiff_t[4]) { pic->linesize[0], pic->linesize[1], pic->linesize[2], pic->linesize[3], }); if (ret < 0) return ret; #else uint8_t *ptrs[4], * const base = (uint8_t *) 0x10000; ret = av_image_fill_pointers(ptrs, pic->format, height, base, pic->linesize); if (ret < 0) return ret; for (int p = 0; p < 4; p++) planesize[p] = (uintptr_t) ptrs[p] - (uintptr_t) base; #endif for (int p = 0; p < planes; p++) { const size_t buf_size = planesize[p] + alignment[p]; if (buf_size > gpu->limits.max_mapped_size) { av_frame_unref(pic); goto fallback; } alloc = malloc(sizeof(*alloc)); if (!alloc) { av_frame_unref(pic); return AVERROR(ENOMEM); } *alloc = (struct pl_avalloc) { .magic = { PL_MAGIC0, PL_MAGIC1 }, .gpu = gpu, .buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .memory_type = PL_BUF_MEM_HOST, .host_mapped = true, )), }; if (!alloc->buf) { free(alloc); av_frame_unref(pic); return AVERROR(ENOMEM); } pic->data[p] = (uint8_t *) PL_ALIGN2((uintptr_t) alloc->buf->data, alignment[p]); pic->buf[p] = av_buffer_create(alloc->buf->data, buf_size, pl_avalloc_free, alloc, 0); if (!pic->buf[p]) { free(alloc); pl_buf_destroy(gpu, &alloc->buf); av_frame_unref(pic); return AVERROR(ENOMEM); } } return 0; fallback: return avcodec_default_get_buffer2(avctx, pic, flags); } #undef PL_MAGIC0 #undef PL_MAGIC1 #undef PL_ALIGN #undef PL_MAX #endif // LIBPLACEBO_LIBAV_H_ libplacebo-v4.192.1/src/include/libplacebo/utils/upload.h000066400000000000000000000161411417677245700233040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_UPLOAD_H_ #define LIBPLACEBO_UPLOAD_H_ #include #include #include PL_API_BEGIN // This file contains a utility function to assist in uploading data from host // memory to a texture. In particular, the texture will be suitable for use as // a `pl_plane`. // Description of the host representation of an image plane struct pl_plane_data { enum pl_fmt_type type; // meaning of the data (must not be UINT or SINT) int width, height; // dimensions of the plane int component_size[4]; // size in bits of each coordinate int component_pad[4]; // ignored bits preceding each component int component_map[4]; // semantic meaning of each component (pixel order) size_t pixel_stride; // offset in bytes between pixels (required) size_t row_stride; // offset in bytes between rows (optional) // Similar to `pl_tex_transfer_params`, you can either upload from a raw // pointer address, or a buffer + offset. Again, the use of these two // mechanisms is mutually exclusive. // // 1. Uploading from host memory const void *pixels; // the actual data underlying this plane // 2. Uploading from a buffer (requires `pl_gpu_limits.buf_transfer`) pl_buf buf; // the buffer to use size_t buf_offset; // offset of data within buffer, must be a // multiple of `pixel_stride` as well as of 4 // Similar to `pl_tex_transfer_params.callback`, this allows turning the // upload of a plane into an asynchronous upload. The same notes apply. void (*callback)(void *priv); void *priv; // Note: When using this together with `pl_frame`, there is some amount of // overlap between `component_pad` and `pl_color_repr.bits`. Some key // differences between the two: // // - the bits from `component_pad` are ignored; whereas the superfluous bits // in a `pl_color_repr` must be 0. // - the `component_pad` exists to align the component size and placement // with the capabilities of GPUs; the `pl_color_repr` exists to control // the semantics of the color samples on a finer granularity. // - the `pl_color_repr` applies to the color sample as a whole, and // therefore applies to all planes; the `component_pad` can be different // for each plane. // - `component_pad` interacts with float textures by moving the actual // float in memory. `pl_color_repr` interacts with float data as if // the float was converted from an integer under full range semantics. // // To help establish the motivating difference, a typical example of a use // case would be yuv420p10. Since 10-bit GPU texture support is limited, // and working with non-byte-aligned pixels is awkward in general, the // convention is to represent yuv420p10 as 16-bit samples with either the // high or low bits set to 0. In this scenario, the `component_size` of the // `pl_plane_data` and `pl_bit_encoding.sample_depth` would be 16, while // the `pl_bit_encoding.color_depth` would be 10 (and additionally, the // `pl_bit_encoding.bit_shift` would be either 0 or 6, depending on // whether the low or the high bits are used). // // On the contrary, something like a packed, 8-bit XBGR format (where the // X bits are ignored and may contain garbage) would set `component_pad[0]` // to 8, and the component_size[0:2] (respectively) to 8 as well. // // As a general rule of thumb, for maximum compatibility, you should try // and align component_size/component_pad to multiples of 8 and explicitly // clear any remaining superfluous bits (+ use `pl_color_repr.bits` to // ensure they're decoded correctly). You should also try to align the // `pixel_stride` to a power of two. }; // Fills in the `component_size`, `component_pad` and `component_map` fields // based on the supplied mask for each component (in semantic order, i.e. // RGBA). Each element of `mask` must have a contiguous range of set bits. void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]); // Helper function to take a `pl_plane_data` struct and try and improve its // alignment to make it more likely to correspond to a real `pl_fmt`. It does // this by attempting to round each component up to the nearest byte boundary. // This relies on the assumption (true in practice) that superfluous bits of // byte-misaligned formats are explicitly set to 0. // // The resulting shift must be consistent across all components, in which case // it's returned in `out_bits`. If no alignment was possible, `out_bits` is set // to {0}, and this function returns false. bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits); // Helper function to find a suitable `pl_fmt` based on a pl_plane_data's // requirements. This is called internally by `pl_upload_plane`, but it's // exposed to users both as a convenience and so they may pre-emptively check // if a format would be supported without actually having to attempt the upload. pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data); // Upload an image plane to a texture, and output the resulting `pl_plane` // struct to `out_plane` (optional). `tex` must be a valid pointer to a texture // (or NULL), which will be destroyed and reinitialized if it does not already // exist or is incompatible. Returns whether successful. // // The resulting texture is guaranteed to be `sampleable`, and it will also try // and maximize compatibility with the other `pl_renderer` requirements // (blittable, linear filterable, etc.). // // Note: `out_plane->shift_x/y` are left uninitialized, and should be set // explicitly by the user. bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data); // Like `pl_upload_plane`, but only creates an uninitialized texture object // rather than actually performing an upload. This can be useful to, for // example, prepare textures to be used as the target of rendering. // // The resulting texture is guaranteed to be `renderable`, and it will also try // to maximize compatibility with the other `pl_renderer` requirements // (blittable, storable, etc.). bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data); PL_API_END #endif // LIBPLACEBO_UPLOAD_H_ libplacebo-v4.192.1/src/include/libplacebo/vulkan.h000066400000000000000000000566241417677245700221720ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_VULKAN_H_ #define LIBPLACEBO_VULKAN_H_ #include #include #include PL_API_BEGIN // Structure representing a VkInstance. Using this is not required. typedef const PL_STRUCT(pl_vk_inst) { VkInstance instance; // The Vulkan API version supported by this VkInstance. uint32_t api_version; // The associated vkGetInstanceProcAddr pointer. PFN_vkGetInstanceProcAddr get_proc_addr; // The instance extensions that were successfully enabled, including // extensions enabled by libplacebo internally. May contain duplicates. const char * const *extensions; int num_extensions; // The instance layers that were successfully enabled, including // layers enabled by libplacebo internally. May contain duplicates. const char * const *layers; int num_layers; } *pl_vk_inst; struct pl_vk_inst_params { // If set, enable the debugging and validation layers. These should // generally be lightweight and relatively harmless to enable. bool debug; // If set, also enable GPU-assisted verification and best practices // layers. (Note: May cause substantial slowdown and/or result in lots of // false positive spam) bool debug_extra; // If nonzero, restricts the Vulkan API version to be at most this. This // is only really useful for explicitly testing backwards compatibility. uint32_t max_api_version; // Pointer to a user-provided `vkGetInstanceProcAddr`. If this is NULL, // libplacebo will use the directly linked version (if available). PFN_vkGetInstanceProcAddr get_proc_addr; // Enables extra instance extensions. Instance creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // windowing system integration. const char * const *extensions; int num_extensions; // Enables extra optional instance extensions. These are opportunistically // enabled if supported by the device, but otherwise skipped. const char * const *opt_extensions; int num_opt_extensions; // Enables extra layers. Instance creation will fail if these layers are // not all supported. // // NOTE: Layers needed for required/optional extensions are automatically // enabled. The user does not specifically need to enable layers related // to extension support. const char * const *layers; int num_layers; // Enables extra optional layers. These are opportunistically enabled if // supported by the platform, but otherwise skipped. const char * const *opt_layers; int num_opt_layers; }; #define pl_vk_inst_params(...) (&(struct pl_vk_inst_params) { __VA_ARGS__ }) extern const struct pl_vk_inst_params pl_vk_inst_default_params; // Helper function to simplify instance creation. The user could also bypass // these helpers and do it manually, but this function is provided as a // convenience. It also sets up a debug callback which forwards all vulkan // messages to the `pl_log` callback. pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params); void pl_vk_inst_destroy(pl_vk_inst *inst); struct pl_vulkan_queue { uint32_t index; // Queue family index uint32_t count; // Queue family count }; // Structure representing the actual vulkan device and associated GPU instance typedef const PL_STRUCT(pl_vulkan) { pl_gpu gpu; // The vulkan objects in use. The user may use this for their own purposes, // but please note that the lifetime is tied to the lifetime of the // pl_vulkan object, and must not be destroyed by the user. Note that the // created vulkan device may have any number of queues and queue family // assignments; so using it for queue submission commands is ill-advised. VkInstance instance; VkPhysicalDevice phys_device; VkDevice device; // The Vulkan API version supported by this VkPhysicalDevice. uint32_t api_version; // The device extensions that were successfully enabled, including // extensions enabled by libplacebo internally. May contain duplicates. const char * const *extensions; int num_extensions; // The device features that were enabled at device creation time. const VkPhysicalDeviceFeatures2 *features; // The explicit queue families we are using to provide a given capability, // or {0} if no appropriate dedicated queue family exists for this // operation type. // // It's guaranteed that `queue_graphics` is always set, but the existence // of the other two is optional, and libplacebo will only set them if // they are different from the graphics queue. Note that queue_compute // and queue_transfer may refer to the same queue family index. struct pl_vulkan_queue queue_graphics; // provides VK_QUEUE_GRAPHICS_BIT struct pl_vulkan_queue queue_compute; // provides VK_QUEUE_COMPUTE_BIT struct pl_vulkan_queue queue_transfer; // provides VK_QUEUE_TRANSFER_BIT // For convenience, these are the same enabled queue families and their // queue counts in list form. This list does not contain duplicates. const struct pl_vulkan_queue *queues; int num_queues; } *pl_vulkan; struct pl_vulkan_params { // The vulkan instance. Optional, if NULL then libplacebo will internally // create a VkInstance with the settings from `instance_params`. // // Note: The VkInstance provided by the user *MUST* be created with a // VkApplicationInfo.apiVersion of VK_API_VERSION_1_1 or higher. VkInstance instance; // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will // use the directly linked version (if available). // // Note: This overwrites the same value from `instance_params`. PFN_vkGetInstanceProcAddr get_proc_addr; // Configures the settings used for creating an internal vulkan instance. // May be NULL. Ignored if `instance` is set. const struct pl_vk_inst_params *instance_params; // When choosing the device, rule out all devices that don't support // presenting to this surface. When creating a device, enable all extensions // needed to ensure we can present to this surface. Optional. Only legal // when specifying an existing VkInstance to use. VkSurfaceKHR surface; // --- Physical device selection options // The vulkan physical device. May be set by the caller to indicate the // physical device to use. Otherwise, libplacebo will pick the "best" // available GPU, based on the advertised device type. (i.e., it will // prefer discrete GPUs over integrated GPUs). Only legal when specifying // an existing VkInstance to use. VkPhysicalDevice device; // When choosing the device, only choose a device with this exact name. // This overrides `allow_software`. No effect if `device` is set. Note: A // list of devices and their names are logged at level PL_LOG_INFO. const char *device_name; // When choosing the device, only choose a device with this exact UUID. // This overrides `allow_software` and `device_name`. No effect if `device` // is set. uint8_t device_uuid[16]; // When choosing the device, controls whether or not to also allow software // GPUs. No effect if `device` or `device_name` are set. bool allow_software; // --- Logical device creation options // Controls whether or not to allow asynchronous transfers, using transfer // queue families, if supported by the device. This can be significantly // faster and more power efficient, and also allows streaming uploads in // parallel with rendering commands. Enabled by default. bool async_transfer; // Controls whether or not to allow asynchronous compute, using dedicated // compute queue families, if supported by the device. On some devices, // these can allow the GPU to schedule compute shaders in parallel with // fragment shaders. Enabled by default. bool async_compute; // Limits the number of queues to request. If left as 0, this will enable // as many queues as the device supports. Multiple queues can result in // improved efficiency when submitting multiple commands that can entirely // or partially execute in parallel. Defaults to 1, since using more queues // can actually decrease performance. int queue_count; // Enables extra device extensions. Device creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // interop extensions. const char * const *extensions; int num_extensions; // Enables extra optional device extensions. These are opportunistically // enabled if supported by the device, but otherwise skipped. const char * const *opt_extensions; int num_opt_extensions; // Optional extra features to enable at device creation time. These are // opportunistically enabled if supported by the physical device, but // otherwise kept disabled. Users may include extra extension-specific // features in the pNext chain, however these *must* all be // extension-specific structs, i.e. the use of "meta-structs" like // VkPhysicalDeviceVulkan11Features is not allowed. const VkPhysicalDeviceFeatures2 *features; // --- Misc/debugging options // Restrict specific features to e.g. work around driver bugs, or simply // for testing purposes int max_glsl_version; // limit the maximum GLSL version uint32_t max_api_version; // limit the maximum vulkan API version // Removed parameters (no effect) bool disable_events PL_DEPRECATED; }; // Default/recommended parameters. Should generally be safe and efficient. #define PL_VULKAN_DEFAULTS \ .async_transfer = true, \ .async_compute = true, \ /* enabling multiple queues often decreases perf */ \ .queue_count = 1, #define pl_vulkan_params(...) (&(struct pl_vulkan_params) { PL_VULKAN_DEFAULTS __VA_ARGS__ }) extern const struct pl_vulkan_params pl_vulkan_default_params; // Creates a new vulkan device based on the given parameters and initializes // a new GPU. This function will internally initialize a VkDevice. There is // currently no way to share a vulkan device with the caller. If `params` is // left as NULL, it defaults to &pl_vulkan_default_params. // // Thread-safety: Safe pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params); // Destroys the vulkan device and all associated objects, except for the // VkInstance provided by the user. // // Note that all resources allocated from this vulkan object (e.g. via the // `vk->ra` or using `pl_vulkan_create_swapchain`) *must* be explicitly // destroyed by the user before calling this. // // Also note that this function will block until all in-flight GPU commands are // finished processing. You can avoid this by manually calling `pl_gpu_finish` // before `pl_vulkan_destroy`. void pl_vulkan_destroy(pl_vulkan *vk); // For a `pl_gpu` backed by `pl_vulkan`, this function can be used to retrieve // the underlying `pl_vulkan`. Returns NULL for any other type of `gpu`. pl_vulkan pl_vulkan_get(pl_gpu gpu); struct pl_vulkan_device_params { // The instance to use. Required! // // Note: The VkInstance provided by the user *must* be created with a // VkApplicationInfo.apiVersion of VK_API_VERSION_1_1 or higher. VkInstance instance; // Mirrored from `pl_vulkan_params`. All of these fields are optional. PFN_vkGetInstanceProcAddr get_proc_addr; VkSurfaceKHR surface; const char *device_name; uint8_t device_uuid[16]; bool allow_software; }; #define pl_vulkan_device_params(...) (&(struct pl_vulkan_device_params) { __VA_ARGS__ }) // Helper function to choose the best VkPhysicalDevice, given a VkInstance. // This uses the same logic as `pl_vulkan_create` uses internally. If no // matching device was found, this returns VK_NULL_HANDLE. VkPhysicalDevice pl_vulkan_choose_device(pl_log log, const struct pl_vulkan_device_params *params); struct pl_vulkan_swapchain_params { // The surface to use for rendering. Required, the user is in charge of // creating this. Must belong to the same VkInstance as `vk->instance`. VkSurfaceKHR surface; // When choosing the initial surface format, prefer HDR formats over SDR // formats, if any is available. // // Deprecated in favor of `pl_swapchain_colorspace_hint`, which overrides // this parameter if called. bool prefer_hdr PL_DEPRECATED; // The preferred presentation mode. See the vulkan documentation for more // information about these. If the device/surface combination does not // support this mode, libplacebo will fall back to VK_PRESENT_MODE_FIFO_KHR. // // Warning: Leaving this zero-initialized is the same as having specified // VK_PRESENT_MODE_IMMEDIATE_KHR, which is probably not what the user // wants! VkPresentModeKHR present_mode; // Allow up to N in-flight frames. This essentially controls how many // rendering commands may be queued up at the same time. See the // documentation for `pl_swapchain_get_latency` for more information. For // vulkan specifically, we are only able to wait until the GPU has finished // rendering a frame - we are unable to wait until the display has actually // finished displaying it. So this only provides a rough guideline. // Optional, defaults to 3. int swapchain_depth; // This suppresses automatic recreation of the swapchain when any call // returns VK_SUBOPTIMAL_KHR. Normally, libplacebo will recreate the // swapchain internally on the next `pl_swapchain_start_frame`. If enabled, // clients are assumed to take care of swapchain recreations themselves, by // calling `pl_swapchain_resize` as appropriate. libplacebo will tolerate // the "suboptimal" status indefinitely. bool allow_suboptimal; }; #define pl_vulkan_swapchain_params(...) (&(struct pl_vulkan_swapchain_params) { __VA_ARGS__ }) // Creates a new vulkan swapchain based on an existing VkSurfaceKHR. Using this // function requires that the vulkan device was created with the // VK_KHR_swapchain extension. The easiest way of accomplishing this is to set // the `pl_vulkan_params.surface` explicitly at creation time. pl_swapchain pl_vulkan_create_swapchain(pl_vulkan vk, const struct pl_vulkan_swapchain_params *params); // This will return true if the vulkan swapchain is internally detected // as being suboptimal (VK_SUBOPTIMAL_KHR). This might be of use to clients // who have `params->allow_suboptimal` enabled. bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw); // Vulkan interop API, for sharing a single VkDevice (and associated vulkan // resources) directly with the API user. The use of this API is a bit sketchy // and requires careful communication of Vulkan API state. struct pl_vulkan_import_params { // The vulkan instance. Required. // // Note: The VkInstance provided by the user *must* be created with a // VkApplicationInfo.apiVersion of VK_API_VERSION_1_1 or higher. VkInstance instance; // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will // use the directly linked version (if available). PFN_vkGetInstanceProcAddr get_proc_addr; // The physical device selected by the user. Required. VkPhysicalDevice phys_device; // The logical device created by the user. Required. VkDevice device; // --- Logical device parameters // List of all device-level extensions that were enabled. (Instance-level // extensions need not be re-specified here, since it's guaranteed that any // instance-level extensions that device-level extensions depend on were // enabled at the instance level) const char * const *extensions; int num_extensions; // Enabled queue families. At least `queue_graphics` is required. // // It's okay for multiple queue families to be specified with the same // index, e.g. in the event that a dedicated compute queue also happens to // be the dedicated transfer queue. // // It's also okay to leave the queue struct as {0} in the event that no // dedicated queue exists for a given operation type. libplacebo will // automatically fall back to using e.g. the graphics queue instead. struct pl_vulkan_queue queue_graphics; // must support VK_QUEUE_GRAPHICS_BIT struct pl_vulkan_queue queue_compute; // must support VK_QUEUE_COMPUTE_BIT struct pl_vulkan_queue queue_transfer; // must support VK_QUEUE_TRANSFER_BIT // Enabled VkPhysicalDeviceFeatures. The VkDevice provided by the user // *must* be created with the `timelineSemaphore` feature enabled. const VkPhysicalDeviceFeatures2 *features; // --- Misc/debugging options // Restrict specific features to e.g. work around driver bugs, or simply // for testing purposes. See `pl_vulkan_params` for a description of these. int max_glsl_version; uint32_t max_api_version; // Removed parameters (no effect) bool disable_events PL_DEPRECATED; }; #define pl_vulkan_import_params(...) (&(struct pl_vulkan_import_params) { __VA_ARGS__ }) // Import an existing VkDevice instead of creating a new one, and wrap it into // a `pl_vulkan` abstraction. It's safe to `pl_vulkan_destroy` this, which will // destroy application state related to libplacebo but leave the underlying // VkDevice intact. pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params); struct pl_vulkan_wrap_params { // The image itself. It *must* be usable concurrently by all of the queue // family indices listed in `pl_vulkan->queues`. Note that this requires // the use of VK_SHARING_MODE_CONCURRENT if `pl_vulkan->num_queues` is // greater than 1. If this is difficult to achieve for the user, then // `async_transfer` / `async_compute` should be turned off, which // guarantees the use of only one queue family. VkImage image; // The image's dimensions (unused dimensions must be 0) int width; int height; int depth; // The image's format. libplacebo will try to map this to an equivalent // pl_fmt. If no compatible pl_fmt is found, wrapping will fail. VkFormat format; // The usage flags the image was created with. libplacebo will set the // pl_tex capabilities to include whatever it can, as determined by the set // of enabled usage flags. VkImageUsageFlags usage; // See `pl_tex_params` void *user_data; pl_debug_tag debug_tag; }; #define pl_vulkan_wrap_params(...) (&(struct pl_vulkan_wrap_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // Wraps an external VkImage into a pl_tex abstraction. By default, the image // is considered "held" by the user and must be released before calling any // pl_tex_* API calls on it (see `pl_vulkan_release`). // // This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, // which will not destroy the underlying VkImage. If a pl_tex wrapper is // destroyed while an image is not currently being held by the user, that // image is left in an undefined state. // // Wrapping the same VkImage multiple times is undefined behavior, as is trying // to wrap an image belonging to a different VkDevice than the one in use by // `gpu`. // // This function may fail, in which case it returns NULL. pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params); // For purely informative reasons, this contains a list of extensions and // device features that libplacebo *can* make use of. These are all strictly // optional, but provide a hint to the API user as to what might be worth // enabling at device creation time. // // Note: This also includes physical device features provided by extensions. // They are all provided using extension-specific features structs, rather // than the more general purpose VkPhysicalDeviceVulkan11Features etc. extern const char * const pl_vulkan_recommended_extensions[]; extern const int pl_vulkan_num_recommended_extensions; extern const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features; // Analogous to `pl_vulkan_wrap`, this function takes any `pl_tex` (including // ones created by `pl_tex_create`) and unwraps it to expose the underlying // VkImage to the user. Unlike `pl_vulkan_wrap`, this `pl_tex` is *not* // considered held after calling this function - the user must explicitly // `pl_vulkan_hold` before accessing the VkImage. // // `out_format` and `out_flags` will be updated to hold the VkImage's // format and usage flags. (Optional) VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, VkImageUsageFlags *out_flags); // Represents a vulkan semaphore/value pair (for compatibility with timeline // semaphores). When using normal, binary semaphores, `value` may be ignored. typedef struct pl_vulkan_sem { VkSemaphore sem; uint64_t value; } pl_vulkan_sem; // "Hold" a shared image. This will transition the image into the layout // specified by the user, and fire the given semaphore (required) when this is // done. This marks the image as held. Attempting to perform any pl_tex_* // operation (except pl_tex_destroy) on a held image is undefined behavior. // // Returns whether successful. bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout, pl_vulkan_sem sem_out); // This function is similar to `pl_vulkan_hold`, except that rather than // forcibly transitioning to a given layout, the user is instead informed about // the current layout and is in charge of transitioning it to their own layout // before using it. May be more convenient for some users. // // Returns whether successful. bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex, VkImageLayout *layout, pl_vulkan_sem sem_out); // "Release" a shared image, meaning it is no longer held. `layout` describes // the current layout of the image at the point in time when the user is // releasing it. Performing any operation on the VkImage underlying this // `pl_tex` while it is not being held by the user is undefined behavior. // // If `sem_in` is specified, it must fire before libplacebo will actually use // or modify the image. (Optional) // // Note: the lifetime of `sem_in` is indeterminate, and destroying it while the // texture is still depending on that semaphore is undefined behavior. // // Technically, the only way to be sure that it's safe to free is to use // `pl_gpu_finish()` or similar (e.g. `pl_vulkan_destroy` or // `vkDeviceWaitIdle`) after another operation involving `tex` has been emitted // (or the texture has been destroyed). void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout, pl_vulkan_sem sem_in); PL_API_END #endif // LIBPLACEBO_VULKAN_H_ libplacebo-v4.192.1/src/log.c000066400000000000000000000170041417677245700157140ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "pl_thread.h" struct priv { pl_mutex lock; enum pl_log_level log_level_cap; pl_str logbuffer; }; pl_log pl_log_create(int api_ver, const struct pl_log_params *params) { if (api_ver != PL_API_VER) { fprintf(stderr, "*************************************************************\n" "libplacebo: ABI mismatch detected! (requested: %d, compiled: %d)\n" "\n" "This is usually indicative of a linking mismatch, and will\n" "result in serious issues including stack corruption, random\n" "crashes and arbitrary code execution. Aborting as a safety\n" "precaution. Fix your system!\n", api_ver, PL_API_VER); abort(); } struct pl_log *log = pl_zalloc_obj(NULL, log, struct priv); struct priv *p = PL_PRIV(log); log->params = *PL_DEF(params, &pl_log_default_params); pl_mutex_init(&p->lock); pl_info(log, "Initialized libplacebo %s (API v%d)", PL_VERSION, PL_API_VER); return log; } const struct pl_log_params pl_log_default_params = {0}; void pl_log_destroy(pl_log *plog) { pl_log log = *plog; if (!log) return; struct priv *p = PL_PRIV(log); pl_mutex_destroy(&p->lock); pl_free((void *) log); *plog = NULL; } struct pl_log_params pl_log_update(pl_log ptr, const struct pl_log_params *params) { struct pl_log *log = (struct pl_log *) ptr; if (!log) return pl_log_default_params; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); struct pl_log_params prev_params = log->params; log->params = *PL_DEF(params, &pl_log_default_params); pl_mutex_unlock(&p->lock); return prev_params; } enum pl_log_level pl_log_level_update(pl_log ptr, enum pl_log_level level) { struct pl_log *log = (struct pl_log *) ptr; if (!log) return PL_LOG_NONE; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); enum pl_log_level prev_level = log->params.log_level; log->params.log_level = level; pl_mutex_unlock(&p->lock); return prev_level; } void pl_log_level_cap(pl_log log, enum pl_log_level cap) { if (!log) return; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); p->log_level_cap = cap; pl_mutex_unlock(&p->lock); } static FILE *default_stream(void *stream, enum pl_log_level level) { return PL_DEF(stream, level <= PL_LOG_WARN ? stderr : stdout); } void pl_log_simple(void *stream, enum pl_log_level level, const char *msg) { static const char *prefix[] = { [PL_LOG_FATAL] = "fatal", [PL_LOG_ERR] = "error", [PL_LOG_WARN] = "warn", [PL_LOG_INFO] = "info", [PL_LOG_DEBUG] = "debug", [PL_LOG_TRACE] = "trace", }; FILE *h = default_stream(stream, level); fprintf(h, "%5s: %s\n", prefix[level], msg); if (level <= PL_LOG_WARN) fflush(h); } void pl_log_color(void *stream, enum pl_log_level level, const char *msg) { static const char *color[] = { [PL_LOG_FATAL] = "31;1", // bright red [PL_LOG_ERR] = "31", // red [PL_LOG_WARN] = "33", // yellow/orange [PL_LOG_INFO] = "32", // green [PL_LOG_DEBUG] = "34", // blue [PL_LOG_TRACE] = "30;1", // bright black }; FILE *h = default_stream(stream, level); fprintf(h, "\033[%sm%s\033[0m\n", color[level], msg); if (level <= PL_LOG_WARN) fflush(h); } static void pl_msg_va(pl_log log, enum pl_log_level lev, const char *fmt, va_list va) { // Test log message without taking the lock, to avoid thrashing the // lock for thousands of trace messages unless those are actually // enabled. This may be a false negative, in which case log messages may // be lost as a result. But this shouldn't be a big deal, since any // situation leading to lost log messages would itself be a race condition. if (!pl_msg_test(log, lev)) return; // Re-test the log message level with held lock to avoid false positives, // which would be a considerably bigger deal than false negatives struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); // Apply this cap before re-testing the log level, to avoid giving users // messages that should have been dropped by the log level. lev = PL_MAX(lev, p->log_level_cap); if (!pl_msg_test(log, lev)) goto done; p->logbuffer.len = 0; pl_str_append_vasprintf((void *) log, &p->logbuffer, fmt, va); log->params.log_cb(log->params.log_priv, lev, (char *) p->logbuffer.buf); done: pl_mutex_unlock(&p->lock); } void pl_msg(pl_log log, enum pl_log_level lev, const char *fmt, ...) { va_list va; va_start(va, fmt); pl_msg_va(log, lev, fmt, va); va_end(va); } void pl_msg_source(pl_log log, enum pl_log_level lev, const char *src) { if (!pl_msg_test(log, lev) || !src) return; int line = 1; while (*src) { const char *end = strchr(src, '\n'); if (!end) { pl_msg(log, lev, "[%3d] %s", line, src); break; } pl_msg(log, lev, "[%3d] %.*s", line, (int)(end - src), src); src = end + 1; line++; } } #ifdef PL_HAVE_UNWIND #define UNW_LOCAL_ONLY #include #include void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { if (!pl_msg_test(log, lev)) return; unw_cursor_t cursor; unw_context_t uc; unw_word_t ip, off; unw_getcontext(&uc); unw_init_local(&cursor, &uc); int depth = 0; pl_msg(log, lev, " Backtrace:"); while (unw_step(&cursor) > 0) { char symbol[256] = ""; Dl_info info = { .dli_fname = "", }; unw_get_reg(&cursor, UNW_REG_IP, &ip); unw_get_proc_name(&cursor, symbol, sizeof(symbol), &off); dladdr((void *) (uintptr_t) ip, &info); pl_msg(log, lev, " #%-2d 0x%016" PRIxPTR " in %s+0x%" PRIxPTR" at %s+0x%" PRIxPTR, depth++, ip, symbol, off, info.dli_fname, ip - (uintptr_t) info.dli_fbase); } } #elif defined(PL_HAVE_EXECINFO) && !defined(MSAN) #include void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { if (!pl_msg_test(log, lev)) return; PL_ARRAY(void *) buf = {0}; size_t buf_avail = 16; do { buf_avail *= 2; PL_ARRAY_RESIZE(NULL, buf, buf_avail); buf.num = backtrace(buf.elem, buf_avail); } while (buf.num == buf_avail); pl_msg(log, lev, " Backtrace:"); char **strings = backtrace_symbols(buf.elem, buf.num); for (int i = 0; i < buf.num; i++) pl_msg(log, lev, " #%-2d %s", i, strings[i]); free(strings); pl_free(buf.elem); } #else void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { } #endif libplacebo-v4.192.1/src/log.h000066400000000000000000000064161417677245700157260ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include "common.h" // Internal logging-related functions // Warning: Not entirely thread-safe. Exercise caution when using. May result // in either false positives or false negatives. Make sure to re-run this // function while `lock` is held, to ensure no race conditions on the check. static inline bool pl_msg_test(pl_log log, enum pl_log_level lev) { return log && log->params.log_cb && log->params.log_level >= lev; } void pl_msg(pl_log log, enum pl_log_level lev, const char *fmt, ...) PL_PRINTF(3, 4); // Convenience macros #define pl_fatal(log, ...) pl_msg(log, PL_LOG_FATAL, __VA_ARGS__) #define pl_err(log, ...) pl_msg(log, PL_LOG_ERR, __VA_ARGS__) #define pl_warn(log, ...) pl_msg(log, PL_LOG_WARN, __VA_ARGS__) #define pl_info(log, ...) pl_msg(log, PL_LOG_INFO, __VA_ARGS__) #define pl_debug(log, ...) pl_msg(log, PL_LOG_DEBUG, __VA_ARGS__) #define pl_trace(log, ...) pl_msg(log, PL_LOG_TRACE, __VA_ARGS__) #define PL_MSG(obj, lev, ...) pl_msg((obj)->log, lev, __VA_ARGS__) #define PL_FATAL(obj, ...) PL_MSG(obj, PL_LOG_FATAL, __VA_ARGS__) #define PL_ERR(obj, ...) PL_MSG(obj, PL_LOG_ERR, __VA_ARGS__) #define PL_WARN(obj, ...) PL_MSG(obj, PL_LOG_WARN, __VA_ARGS__) #define PL_INFO(obj, ...) PL_MSG(obj, PL_LOG_INFO, __VA_ARGS__) #define PL_DEBUG(obj, ...) PL_MSG(obj, PL_LOG_DEBUG, __VA_ARGS__) #define PL_TRACE(obj, ...) PL_MSG(obj, PL_LOG_TRACE, __VA_ARGS__) // Log something with line numbers included void pl_msg_source(pl_log log, enum pl_log_level lev, const char *src); // Temporarily cap the log level to a certain verbosity. This is intended for // things like probing formats, attempting to create buffers that may fail, and // other types of operations in which we want to suppress errors. Call with // PL_LOG_NONE to disable this cap. // // Warning: This is generally not thread-safe, and only provided as a temporary // hack until a better solution can be thought of. void pl_log_level_cap(pl_log log, enum pl_log_level cap); // CPU execution time reporting helper static inline void pl_log_cpu_time(pl_log log, time_t start, time_t stop, const char *operation) { double ms = (stop - start) * 1e3 / CLOCKS_PER_SEC; enum pl_log_level lev = PL_LOG_DEBUG; if (ms > 10) lev = PL_LOG_INFO; if (ms > 1000) lev = PL_LOG_WARN; pl_msg(log, lev, "Spent %.3f ms %s%s", ms, operation, ms > 100 ? " (slow!)" : ""); } // Log stack trace void pl_log_stack_trace(pl_log log, enum pl_log_level lev); libplacebo-v4.192.1/src/meson.build000066400000000000000000000333421417677245700171340ustar00rootroot00000000000000# Configuration data conf_public = configuration_data() conf_internal = configuration_data() conf_public.set('majorver', majorver) conf_public.set('apiver', apiver) conf_internal.set('BUILD_API_VER', apiver) conf_internal.set('BUILD_FIX_VER', fixver) # Dependencies prog_python = import('python').find_installation() libm = cc.find_library('m', required: false) if host_machine.system() == 'windows' threads = declare_dependency() else threads = dependency('threads') conf_internal.set('PL_HAVE_PTHREAD', threads.found()) conf_internal.set('PTHREAD_HAS_SETCLOCK', cc.has_header_symbol( 'pthread.h', 'pthread_condattr_setclock', dependencies: threads, args: c_opts, )) endif build_deps = [ libm, threads ] test_deps = [] vulkan = dependency('vulkan', required: get_option('vulkan')) opengl = dependency('epoxy', version: '>=1.4.0', required: get_option('opengl')) shaderc = dependency('shaderc', version: '>=2019.1', required: get_option('shaderc')) cross = dependency('spirv-cross-c-shared', version: '>=0.29.0', required: get_option('d3d11')) if opengl.found() has_egl = opengl.get_variable(pkgconfig: 'epoxy_has_egl', default_value: '0') conf_internal.set('EPOXY_HAS_EGL', has_egl.to_int() == 1) endif vulkan_headers = vulkan if vulkan.found() and not get_option('vulkan-link') vulkan_headers = vulkan.partial_dependency(includes: true, compile_args: true) endif if not vulkan_headers.found() and not get_option('vulkan').disabled() # Probe for as a fallback for the vulkan headers if cc.check_header('vulkan/vulkan.h') vulkan_headers = declare_dependency() endif endif d3d11 = disabler() d3d11_inc = ['d3d11_4.h', 'dxgi1_6.h'] d3d11_deps = [ cross, cc.find_library('version', required: get_option('d3d11')), ] d3d11_found = true foreach h : d3d11_inc d3d11_found = d3d11_found and cc.check_header(h, required: get_option('d3d11')) endforeach foreach d : d3d11_deps d3d11_found = d3d11_found and d.found() endforeach if d3d11_found d3d11 = declare_dependency(dependencies: d3d11_deps) add_project_arguments(['-DCOBJMACROS', '-DINITGUID'], language: 'c') endif unwind = dependency('libunwind', required: get_option('unwind')) has_execinfo = cc.has_header('execinfo.h') conf_internal.set('PL_HAVE_UNWIND', unwind.found()) conf_internal.set('PL_HAVE_EXECINFO', has_execinfo) if unwind.found() build_deps += [unwind, cc.find_library('dl', required : false)] elif has_execinfo build_deps += cc.find_library('execinfo', required: false) endif # work-arounds for glslang braindeath glslang_combined = disabler() glslang_min_ver = '>=0.0.2763' glslang_req = get_option('glslang') if glslang_req.auto() and shaderc.found() # we only need one or the other, and shaderc is preferred message('Skipping `glslang` because `shaderc` is available') glslang_req = false glslang_found = false else glslang_deps = [ cxx.find_library('glslang', required: glslang_req), cxx.find_library('MachineIndependent', required: false), cxx.find_library('OSDependent', required: glslang_req), cxx.find_library('HLSL', required: glslang_req), cxx.find_library('OGLCompiler', required: glslang_req), cxx.find_library('GenericCodeGen', required: false), cxx.find_library('SPVRemapper', required: glslang_req), cxx.find_library('SPIRV', required: glslang_req), cxx.find_library('SPIRV-Tools-opt', required: false), cxx.find_library('SPIRV-Tools', required: false), ] glslang_found = glslang_deps[0].found() endif if glslang_found glslang_header_old = 'glslang/Include/revision.h' glslang_header_new = 'glslang/build_info.h' if cc.has_header(glslang_header_new) glslang_ver_major = cxx.get_define('GLSLANG_VERSION_MAJOR', prefix: '#include <' + glslang_header_new + '>' ).to_int() glslang_ver_minor = cxx.get_define('GLSLANG_VERSION_MINOR', prefix: '#include <' + glslang_header_new + '>' ).to_int() glslang_ver_patch = cxx.get_define('GLSLANG_VERSION_PATCH', prefix: '#include <' + glslang_header_new + '>' ).to_int() elif cc.has_header(glslang_header_old) # This is technically incorrect, but since we don't care about major # versions for this version range, it's an acceptable substitute glslang_ver_major = 0 glslang_ver_minor = 0 glslang_ver_patch = cxx.get_define('GLSLANG_PATCH_LEVEL', prefix: '#include <' + glslang_header_old+ '>' ).to_int() else error('No glslang version header found?') endif glslang_ver = '@0@.@1@.@2@'.format( glslang_ver_major, glslang_ver_minor, glslang_ver_patch, ) if glslang_ver.version_compare(glslang_min_ver) # glslang must be linked against pthreads on platforms where pthreads is # available. Because of their horrible architecture, gcc can't do it # automatically, and for some reason dependency('threads') (which uses # -pthread) doesn't work. We actually need -lpthreads for the glslang # object files to link, for whatever weird reason. pthread = cxx.find_library('pthread', required: false) glslang_all_deps = glslang_deps + [pthread] glslang_combined = declare_dependency(dependencies: glslang_all_deps) # Work around a glslang include path bug w.r.t stuff previously namespaced # under /usr/include/SPIRV now being moved to /usr/include/glslang/SPIRV. extra_glslang_inc = [ '/usr/include/glslang', get_option('prefix') / get_option('includedir') / 'glslang', ] foreach i : extra_glslang_inc add_project_arguments('-I' + i, language: 'cpp') endforeach conf_internal.set('GLSLANG_VERSION_MAJOR', glslang_ver_major) conf_internal.set('GLSLANG_VERSION_MINOR', glslang_ver_minor) conf_internal.set('GLSLANG_VERSION_PATCH', glslang_ver_patch) else error('glslang version @0@ too old! Must be at least @1@' .format(glslang_ver, glslang_min_ver)) endif endif # Work around missing atomics on some (obscure) platforms atomic_test = ''' #include #include int main(void) { _Atomic uint32_t x32; atomic_init(&x32, 0); }''' if not cc.links(atomic_test) build_deps += cc.find_library('atomic') endif # Source files headers = [ 'colorspace.h', 'common.h', 'context.h', 'dispatch.h', 'dither.h', 'dummy.h', 'filters.h', 'gpu.h', 'log.h', 'renderer.h', 'shaders/colorspace.h', 'shaders/custom.h', 'shaders/film_grain.h', 'shaders/lut.h', 'shaders/sampling.h', 'shaders.h', 'swapchain.h', 'tone_mapping.h', 'utils/dav1d.h', 'utils/dav1d_internal.h', 'utils/frame_queue.h', 'utils/libav.h', 'utils/libav_internal.h', 'utils/upload.h', ] sources = [ 'colorspace.c', 'common.c', 'dither.c', 'dispatch.c', 'dummy.c', 'filters.c', 'format.c', 'glsl/spirv.c', 'glsl/utils.c', 'gpu.c', 'log.c', 'pl_alloc.c', 'pl_string.c', 'renderer.c', 'siphash.c', 'shaders.c', 'shaders/colorspace.c', 'shaders/custom.c', 'shaders/film_grain.c', 'shaders/film_grain_av1.c', 'shaders/film_grain_h274.c', 'shaders/lut.c', 'shaders/sampling.c', 'swapchain.c', 'tone_mapping.c', 'utils/frame_queue.c', 'utils/upload.c', ] tests = [ 'colorspace.c', 'common.c', 'dither.c', 'dummy.c', 'lut.c', 'filters.c', 'string.c', 'tone_mapping.c', 'utils.c', ] fuzzers = [ 'lut.c', 'shaders.c', 'user_shaders.c', ] # Optional dependencies / components components = [ { 'name': 'lcms', 'deps': dependency('lcms2', version: '>=2.6', required: get_option('lcms')), 'srcs': 'shaders/icc.c', 'headers': 'shaders/icc.h', }, { 'name': 'glslang', 'deps': glslang_combined, 'srcs': [ 'glsl/glslang.cc', 'glsl/spirv_glslang.c', ], }, { 'name': 'shaderc', 'deps': shaderc, 'srcs': 'glsl/spirv_shaderc.c', }, { 'name': 'vulkan', 'deps': vulkan_headers, 'srcs': [ 'vulkan/command.c', 'vulkan/context.c', 'vulkan/formats.c', 'vulkan/gpu.c', 'vulkan/gpu_buf.c', 'vulkan/gpu_tex.c', 'vulkan/gpu_pass.c', 'vulkan/malloc.c', 'vulkan/swapchain.c', 'vulkan/utils.c', ], 'headers': 'vulkan.h', }, { 'name': 'opengl', 'deps': opengl, 'srcs': [ 'opengl/context.c', 'opengl/formats.c', 'opengl/gpu.c', 'opengl/gpu_tex.c', 'opengl/gpu_pass.c', 'opengl/swapchain.c', 'opengl/utils.c', ], 'headers': 'opengl.h', 'test': 'opengl_surfaceless.c', }, { 'name': 'd3d11', 'deps': d3d11, 'srcs': [ 'd3d11/context.c', 'd3d11/formats.c', 'd3d11/gpu.c', 'd3d11/gpu_buf.c', 'd3d11/gpu_tex.c', 'd3d11/gpu_pass.c', 'd3d11/swapchain.c', 'd3d11/utils.c', ], 'headers': 'd3d11.h', 'test': 'd3d11.c', } ] defs = '' pc_vars = [] comps = configuration_data() foreach c : components name = c['name'] deps = c['deps'] pretty = name.underscorify().to_upper() if deps.found() defs += '#define PL_HAVE_@0@ 1\n'.format(pretty) pc_vars += 'pl_has_@0@=1'.format(pretty.to_lower()) comps.set(name, 1) build_deps += deps sources += c.get('srcs', []) headers += c.get('headers', []) tests += c.get('test', []) else defs += '#undef PL_HAVE_@0@\n'.format(pretty) pc_vars += 'pl_has_@0@=0'.format(pretty.to_lower()) endif endforeach # Extra checks/steps required for vulkan in particular if comps.has('vulkan') registry_xml = get_option('vulkan-registry') sources += custom_target('vulkan boilerplate', input: 'vulkan/utils_gen.py', output: 'utils_gen.c', command: [prog_python, '@INPUT@', registry_xml, '@OUTPUT@'] ) if vulkan.found() and get_option('vulkan-link') defs += '#define PL_HAVE_VK_PROC_ADDR 1' pc_vars += 'pl_has_vk_proc_addr=1' else defs += '#undef PL_HAVE_VK_PROC_ADDR' pc_vars += 'pl_has_vk_proc_addr=0' endif endif # Check to see if libplacebo built this way is sane if not (comps.has('vulkan') or comps.has('opengl') or comps.has('d3d11')) warning('Building without any graphics API. libplacebo built this way still ' + 'has some limited use (e.g. generating GLSL shaders), but most of ' + 'its functionality will be missing or impaired!') endif if comps.has('vulkan') and not (comps.has('shaderc') or comps.has('glslang')) error('Building with support for Vulkan requires either `shaderc` or ' + '`glslang` to be of any use, otherwise libplacebo would fail to ' + 'compile GLSL to SPIR-V (needed by the Vulkan API)!') endif # Build process conf_public.set('extra_defs', defs) subdir('./include/libplacebo') # generate config.h in the right location configure_file( output: 'config_internal.h', configuration: conf_internal ) sources += vcs_tag( command: ['git', 'describe'], fallback: version_pretty, replace_string: '@buildver@', input: 'version.h.in', output: 'version.h', ) inc = include_directories('./include') lib = library('placebo', sources, install: true, dependencies: build_deps, soversion: apiver, include_directories: inc, link_args: link_args, ) libplacebo = declare_dependency( link_with: lib, include_directories: inc, ) # Allows projects to build libplacebo by cloning into ./subprojects/libplacebo meson.override_dependency('libplacebo', libplacebo) # Install process foreach h : headers parts = h.split('/') path = proj_name foreach p : parts if p != parts[-1] path = path / p endif endforeach install_headers('include' / proj_name / h, subdir: path) endforeach pkg = import('pkgconfig') pkg.generate( name: proj_name, description: 'Reusable library for GPU-accelerated video/image rendering', libraries: lib, version: version, variables: pc_vars, ) # Tests tdep = [ declare_dependency( link_with: lib, dependencies: build_deps + test_deps, include_directories: inc, ) ] if get_option('tests') dav1d = dependency('dav1d', required: false) if dav1d.found() tdep += dav1d tests += 'dav1d.c' endif lavu = dependency('libavutil', version: '>=55.74.100', required: false) lavc = dependency('libavcodec', required: false) lavf = dependency('libavformat', required: false) libav_found = lavu.found() and lavc.found() and lavf.found() if libav_found tdep += [lavu, lavc, lavf] tests += 'libav.c' endif if vulkan.found() tdep += vulkan tests += 'vulkan.c' endif foreach t : tests e = executable('test.' + t, 'tests/' + t, objects: lib.extract_all_objects(recursive: false), c_args: [ '-Wno-unused-function' ], dependencies: tdep, ) test(t, e) endforeach # Ensure all headers compile foreach h : headers if (h.contains('internal') or h.contains('dav1d') and not dav1d.found() or h.contains('libav') and not libav_found) continue endif t = configure_file( input: 'tests/include_tmpl.c', output: 'include_@0@.c'.format(h.underscorify()), configuration: { 'header': h }, ) executable('test.include.' + h.underscorify(), t, dependencies: tdep, c_args: [ '-Wno-unused-function' ], ) endforeach endif if get_option('bench') if not vulkan.found() error('Compiling the benchmark suite requires vulkan support!') endif bench = executable('bench', 'tests/bench.c', dependencies: tdep) test('benchmark', bench, is_parallel: false, timeout: 600) endif if get_option('fuzz') foreach f : fuzzers executable('fuzz.' + f, 'tests/fuzz/' + f, objects: lib.extract_all_objects(recursive: false), dependencies: tdep, ) endforeach endif libplacebo-v4.192.1/src/opengl/000077500000000000000000000000001417677245700162515ustar00rootroot00000000000000libplacebo-v4.192.1/src/opengl/common.h000066400000000000000000000021431417677245700177120ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../common.h" #include "../log.h" #include "../gpu.h" #include #ifdef EPOXY_HAS_EGL #include #endif struct gl_cb { void (*callback)(void *priv); void *priv; GLsync sync; }; struct fbo_format { pl_fmt fmt; const struct gl_format *glfmt; }; // For locking/unlocking bool gl_make_current(pl_opengl gl); void gl_release_current(pl_opengl gl); libplacebo-v4.192.1/src/opengl/context.c000066400000000000000000000163441417677245700201110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "utils.h" #include "gpu.h" #include "pl_thread.h" const struct pl_opengl_params pl_opengl_default_params = {0}; struct priv { struct pl_opengl_params params; pl_log log; bool is_debug; bool is_debug_egl; // For context locking pl_mutex lock; int count; }; static void GLAPIENTRY debug_cb(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam) { pl_log log = (void *) userParam; enum pl_log_level level = PL_LOG_ERR; switch (severity) { case GL_DEBUG_SEVERITY_NOTIFICATION:level = PL_LOG_DEBUG; break; case GL_DEBUG_SEVERITY_LOW: level = PL_LOG_INFO; break; case GL_DEBUG_SEVERITY_MEDIUM: level = PL_LOG_WARN; break; case GL_DEBUG_SEVERITY_HIGH: level = PL_LOG_ERR; break; } #ifndef MSAN pl_msg(log, level, "GL: %s", message); if (level <= PL_LOG_ERR) pl_log_stack_trace(log, level); #endif } #ifdef EPOXY_HAS_EGL static void debug_cb_egl(EGLenum error, const char *command, EGLint messageType, EGLLabelKHR threadLabel, EGLLabelKHR objectLabel, const char *message) { pl_log log = threadLabel; enum pl_log_level level = PL_LOG_ERR; switch (messageType) { case EGL_DEBUG_MSG_CRITICAL_KHR: level = PL_LOG_FATAL; break; case EGL_DEBUG_MSG_ERROR_KHR: level = PL_LOG_ERR; break; case EGL_DEBUG_MSG_WARN_KHR: level = PL_LOG_WARN; break; case EGL_DEBUG_MSG_INFO_KHR: level = PL_LOG_DEBUG; break; } #ifndef MSAN pl_msg(log, level, "EGL: %s: %s %s", command, egl_err_str(error), message); if (level <= PL_LOG_ERR) pl_log_stack_trace(log, level); #endif } #endif // EPOXY_HAS_EGL void pl_opengl_destroy(pl_opengl *ptr) { pl_opengl pl_gl = *ptr; if (!pl_gl) return; struct priv *p = PL_PRIV(pl_gl); if (!gl_make_current(pl_gl)) { PL_WARN(p, "Failed uninitializing OpenGL context, leaking resources!"); return; } if (p->is_debug) glDebugMessageCallback(NULL, NULL); #ifdef EPOXY_HAS_EGL if (p->is_debug_egl) eglDebugMessageControlKHR(NULL, NULL); #endif pl_gpu_destroy(pl_gl->gpu); gl_release_current(pl_gl); pl_mutex_destroy(&p->lock); pl_free_ptr((void **) ptr); } pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params) { params = PL_DEF(params, &pl_opengl_default_params); struct pl_opengl *pl_gl = pl_zalloc_obj(NULL, pl_gl, struct priv); struct priv *p = PL_PRIV(pl_gl); p->params = *params; p->log = log; pl_mutex_init_type(&p->lock, PL_MUTEX_RECURSIVE); if (!gl_make_current(pl_gl)) { pl_free(pl_gl); return NULL; } int ver = epoxy_gl_version(); if (!ver) { PL_FATAL(p, "No OpenGL version detected - make sure an OpenGL context " "is bound to the current thread!"); goto error; } PL_INFO(p, "Detected OpenGL version strings:"); PL_INFO(p, " GL_VERSION: %s", (char *) glGetString(GL_VERSION)); PL_INFO(p, " GL_VENDOR: %s", (char *) glGetString(GL_VENDOR)); PL_INFO(p, " GL_RENDERER: %s", (char *) glGetString(GL_RENDERER)); if (pl_msg_test(log, PL_LOG_DEBUG)) { if (ver >= 30) { int num_exts = 0; glGetIntegerv(GL_NUM_EXTENSIONS, &num_exts); PL_DEBUG(p, " GL_EXTENSIONS:"); for (int i = 0; i < num_exts; i++) { const char *ext = (char *) glGetStringi(GL_EXTENSIONS, i); PL_DEBUG(p, " %s", ext); } } else { PL_DEBUG(p, " GL_EXTENSIONS: %s", (char *) glGetString(GL_EXTENSIONS)); } #ifdef EPOXY_HAS_EGL if (params->egl_display) { PL_DEBUG(p, " EGL_EXTENSIONS: %s", eglQueryString(params->egl_display, EGL_EXTENSIONS)); } #endif } if (!params->allow_software && gl_is_software()) { PL_FATAL(p, "OpenGL context is suspected to be a software rasterizer, " "but `allow_software` is false."); goto error; } if (params->debug) { if (epoxy_has_gl_extension("GL_KHR_debug")) { glDebugMessageCallback(debug_cb, log); glEnable(GL_DEBUG_OUTPUT); p->is_debug = true; } else { PL_WARN(p, "OpenGL debugging requested, but GL_KHR_debug is not " "available... ignoring!"); } #ifdef EPOXY_HAS_EGL if (params->egl_display && epoxy_has_egl_extension(params->egl_display, "EGL_KHR_debug")) { static const EGLAttrib attribs[] = { // Enable everything under the sun, because the `pl_ctx` log // level may change at runtime. EGL_DEBUG_MSG_CRITICAL_KHR, EGL_TRUE, EGL_DEBUG_MSG_ERROR_KHR, EGL_TRUE, EGL_DEBUG_MSG_WARN_KHR, EGL_TRUE, EGL_DEBUG_MSG_INFO_KHR, EGL_TRUE, EGL_NONE, }; eglDebugMessageControlKHR(debug_cb_egl, attribs); eglLabelObjectKHR(NULL, EGL_OBJECT_THREAD_KHR, NULL, (void *) log); p->is_debug_egl = true; } #endif // EPOXY_HAS_EGL } pl_gl->gpu = pl_gpu_create_gl(log, pl_gl, params); if (!pl_gl->gpu) goto error; // Restrict version if (params->max_glsl_version) { struct pl_glsl_version *glsl = (struct pl_glsl_version *) &pl_gl->gpu->glsl; glsl->version = PL_MIN(glsl->version, params->max_glsl_version); PL_INFO(p, "Restricting GLSL version to %d... new version is %d", params->max_glsl_version, glsl->version); } gl_release_current(pl_gl); return pl_gl; error: PL_FATAL(p, "Failed initializing opengl context!"); gl_release_current(pl_gl); pl_opengl_destroy((pl_opengl *) &pl_gl); return NULL; } bool gl_make_current(pl_opengl gl) { struct priv *p = PL_PRIV(gl); pl_mutex_lock(&p->lock); if (!p->count && p->params.make_current) { if (!p->params.make_current(p->params.priv)) { PL_ERR(p, "Failed making OpenGL context current on calling thread!"); pl_mutex_unlock(&p->lock); return false; } } p->count++; return true; } void gl_release_current(pl_opengl gl) { struct priv *p = PL_PRIV(gl); p->count--; if (!p->count && p->params.release_current) p->params.release_current(p->params.priv); pl_mutex_unlock(&p->lock); } libplacebo-v4.192.1/src/opengl/formats.c000066400000000000000000000417721417677245700201030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "common.h" #include "formats.h" #include "utils.h" #if defined(PL_HAVE_UNIX) && defined(EPOXY_HAS_EGL) static bool supported_fourcc(struct pl_gl *p, EGLint fourcc) { for (int i = 0; i < p->egl_formats.num; ++i) if (fourcc == p->egl_formats.elem[i]) return true; return false; } #endif #define FMT(_name, bits, ftype, _caps) \ (struct pl_fmt) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .caps = (enum pl_fmt_caps) (_caps), \ .sample_order = {0, 1, 2, 3}, \ .component_depth = {bits, bits, bits, bits}, \ } // Convenience to make the names simpler enum { // Type aliases U8 = GL_UNSIGNED_BYTE, U16 = GL_UNSIGNED_SHORT, U32 = GL_UNSIGNED_INT, I8 = GL_BYTE, I16 = GL_SHORT, I32 = GL_INT, FLT = GL_FLOAT, // Component aliases R = GL_RED, RG = GL_RG, RGB = GL_RGB, RGBA = GL_RGBA, BGRA = GL_BGRA, RI = GL_RED_INTEGER, RGI = GL_RG_INTEGER, RGBI = GL_RGB_INTEGER, RGBAI = GL_RGBA_INTEGER, // Capability aliases S = PL_FMT_CAP_SAMPLEABLE, L = PL_FMT_CAP_LINEAR, F = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE, // FBO support V = PL_FMT_CAP_VERTEX, }; // Basic 8-bit formats const struct gl_format formats_norm8[] = { {GL_R8, R, U8, FMT("r8", 8, UNORM, S|L|F|V)}, {GL_RG8, RG, U8, FMT("rg8", 8, UNORM, S|L|F|V)}, {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|F|V)}, {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|F|V)}, }; // BGRA 8-bit const struct gl_format formats_bgra8[] = { {GL_RGBA8, BGRA, U8, { .name = "bgra8", .type = PL_FMT_UNORM, .caps = S|L|F|V, .sample_order = {2, 1, 0, 3}, .component_depth = {8, 8, 8, 8}, }}, }; // Basic 16-bit formats, excluding rgb16 (special cased below) const struct gl_format formats_norm16[] = { {GL_R16, R, U16, FMT("r16", 16, UNORM, S|L|F|V)}, {GL_RG16, RG, U16, FMT("rg16", 16, UNORM, S|L|F|V)}, {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|F|V)}, }; // Renderable version of rgb16 const struct gl_format formats_rgb16_fbo[] = { {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|F|V)}, }; // Non-renderable version of rgb16 const struct gl_format formats_rgb16_fallback[] = { {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, }; // Floating point texture formats const struct gl_format formats_float[] = { {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L|F)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, S|L|F|V)}, {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, S|L|F|V)}, {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, S|L|F|V)}, {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, S|L|F|V)}, }; // Renderable 16-bit float formats (excluding rgb16f) const struct gl_format formats_float16_fbo[] = { {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, }; // Non-renderable 16-bit float formats const struct gl_format formats_float16_fallback[] = { {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L)}, }; // (Unsigned) integer formats const struct gl_format formats_uint[] = { {GL_R8UI, RI, U8, FMT("r8u", 8, UINT, S|F|V)}, {GL_RG8UI, RGI, U8, FMT("rg8u", 8, UINT, S|F|V)}, {GL_RGB8UI, RGBI, U8, FMT("rgb8u", 8, UINT, S|V)}, {GL_RGBA8UI, RGBAI, U8, FMT("rgba8u", 8, UINT, S|F|V)}, {GL_R16UI, RI, U16, FMT("r16u", 16, UINT, S|F|V)}, {GL_RG16UI, RGI, U16, FMT("rg16u", 16, UINT, S|F|V)}, {GL_RGB16UI, RGBI, U16, FMT("rgb16u", 16, UINT, S|V)}, {GL_RGBA16UI, RGBAI, U16, FMT("rgba16u", 16, UINT, S|F|V)}, }; /* TODO {GL_R32UI, RI, U32, FMT("r32u", 32, UINT)}, {GL_RG32UI, RGI, U32, FMT("rg32u", 32, UINT)}, {GL_RGB32UI, RGBI, U32, FMT("rgb32u", 32, UINT)}, {GL_RGBA32UI, RGBAI, U32, FMT("rgba32u", 32, UINT)}, {GL_R8_SNORM, R, I8, FMT("r8s", 8, SNORM)}, {GL_RG8_SNORM, RG, I8, FMT("rg8s", 8, SNORM)}, {GL_RGB8_SNORM, RGB, I8, FMT("rgb8s", 8, SNORM)}, {GL_RGBA8_SNORM, RGBA, I8, FMT("rgba8s", 8, SNORM)}, {GL_R16_SNORM, R, I16, FMT("r16s", 16, SNORM)}, {GL_RG16_SNORM, RG, I16, FMT("rg16s", 16, SNORM)}, {GL_RGB16_SNORM, RGB, I16, FMT("rgb16s", 16, SNORM)}, {GL_RGBA16_SNORM, RGBA, I16, FMT("rgba16s", 16, SNORM)}, {GL_R8I, RI, I8, FMT("r8i", 8, SINT)}, {GL_RG8I, RGI, I8, FMT("rg8i", 8, SINT)}, {GL_RGB8I, RGBI, I8, FMT("rgb8i", 8, SINT)}, {GL_RGBA8I, RGBAI, I8, FMT("rgba8i", 8, SINT)}, {GL_R16I, RI, I16, FMT("r16i", 16, SINT)}, {GL_RG16I, RGI, I16, FMT("rg16i", 16, SINT)}, {GL_RGB16I, RGBI, I16, FMT("rgb16i", 16, SINT)}, {GL_RGBA16I, RGBAI, I16, FMT("rgba16i", 16, SINT)}, {GL_R32I, RI, I32, FMT("r32i", 32, SINT)}, {GL_RG32I, RGI, I32, FMT("rg32i", 32, SINT)}, {GL_RGB32I, RGBI, I32, FMT("rgb32i", 32, SINT)}, {GL_RGBA32I, RGBAI, I32, FMT("rgba32i", 32, SINT)}, */ // GL2 legacy formats const struct gl_format formats_legacy_gl2[] = { {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|V)}, {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|V)}, {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|V)}, }; // GLES2 legacy formats const struct gl_format formats_legacy_gles2[] = { {GL_RGB, RGB, U8, FMT("rgb", 8, UNORM, S|L)}, {GL_RGBA, RGBA, U8, FMT("rgba", 8, UNORM, S|L)}, }; // GLES BGRA const struct gl_format formats_bgra_gles[] = { {GL_BGRA, BGRA, U8, { .name = "bgra8", .type = PL_FMT_UNORM, .caps = S|L|F|V, .sample_order = {2, 1, 0, 3}, .component_depth = {8, 8, 8, 8}, }}, }; // Fallback for vertex-only formats, as a last resort const struct gl_format formats_basic_vertex[] = { {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, V)}, {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, V)}, {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, V)}, {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, V)}, }; static void add_format(pl_gpu pgpu, const struct gl_format *gl_fmt) { struct pl_gpu *gpu = (struct pl_gpu *) pgpu; struct pl_gl *p = PL_PRIV(gpu); struct pl_fmt *fmt = pl_alloc_obj(gpu, fmt, gl_fmt); const struct gl_format **fmtp = PL_PRIV(fmt); *fmt = gl_fmt->tmpl; *fmtp = gl_fmt; // Calculate the host size and number of components switch (gl_fmt->fmt) { case GL_RED: case GL_RED_INTEGER: fmt->num_components = 1; break; case GL_RG: case GL_RG_INTEGER: fmt->num_components = 2; break; case GL_RGB: case GL_RGB_INTEGER: fmt->num_components = 3; break; case GL_RGBA: case GL_RGBA_INTEGER: case GL_BGRA: fmt->num_components = 4; break; default: pl_unreachable(); } int size; switch (gl_fmt->type) { case GL_BYTE: case GL_UNSIGNED_BYTE: size = 1; break; case GL_SHORT: case GL_UNSIGNED_SHORT: size = 2; break; case GL_INT: case GL_UNSIGNED_INT: case GL_FLOAT: size = 4; break; default: pl_unreachable(); } // Host visible representation fmt->texel_size = fmt->num_components * size; fmt->texel_align = 1; for (int i = 0; i < fmt->num_components; i++) fmt->host_bits[i] = size * 8; // Compute internal size by summing up the depth int ibits = 0; for (int i = 0; i < fmt->num_components; i++) ibits += fmt->component_depth[i]; fmt->internal_size = (ibits + 7) / 8; // We're not the ones actually emulating these texture format - the // driver is - but we might as well set the hint. fmt->emulated = fmt->texel_size != fmt->internal_size; // 3-component formats are almost surely also emulated if (fmt->num_components == 3) fmt->emulated = true; // Older OpenGL most likely emulates 32-bit float formats as well if (p->gl_ver < 30 && fmt->component_depth[0] >= 32) fmt->emulated = true; // For sanity, clear the superfluous fields for (int i = fmt->num_components; i < 4; i++) { fmt->component_depth[i] = 0; fmt->sample_order[i] = 0; fmt->host_bits[i] = 0; } fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); fmt->fourcc = pl_fmt_fourcc(fmt); pl_assert(fmt->glsl_type); #if defined(PL_HAVE_UNIX) && defined(EPOXY_HAS_EGL) if (p->has_modifiers && fmt->fourcc && supported_fourcc(p, fmt->fourcc)) { int num_mods = 0; bool ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, 0, NULL, NULL, &num_mods); if (ok && num_mods) { // On my system eglQueryDmaBufModifiersEXT seems to never return // MOD_INVALID even though eglExportDMABUFImageQueryMESA happily // returns such modifiers. Since we handle INVALID by not // requiring modifiers at all, always add this value to the // list of supported modifiers. May result in duplicates, but // whatever. uint64_t *mods = pl_calloc(fmt, num_mods + 1, sizeof(uint64_t)); mods[0] = DRM_FORMAT_MOD_INVALID; ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, num_mods, &mods[1], NULL, &num_mods); if (ok) { fmt->modifiers = mods; fmt->num_modifiers = num_mods + 1; } else { pl_free(mods); } } eglGetError(); // ignore probing errors } if (!fmt->num_modifiers) { // Hacky fallback for older drivers that don't support properly // querying modifiers static const uint64_t static_mods[] = { DRM_FORMAT_MOD_INVALID, DRM_FORMAT_MOD_LINEAR, }; fmt->num_modifiers = PL_ARRAY_SIZE(static_mods); fmt->modifiers = static_mods; } #endif // Gathering requires checking the format type (and extension presence) if (fmt->caps & PL_FMT_CAP_SAMPLEABLE) fmt->gatherable = p->gather_comps >= fmt->num_components; // Mask renderable/blittable if no FBOs available if (!p->has_fbos) fmt->caps &= ~(PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE); // Reading from textures on GLES requires FBO support for this fmt if (p->has_readback && (p->gl_ver || (fmt->caps & PL_FMT_CAP_RENDERABLE))) fmt->caps |= PL_FMT_CAP_HOST_READABLE; if (gpu->glsl.compute && fmt->glsl_format && p->has_storage) fmt->caps |= PL_FMT_CAP_STORABLE | PL_FMT_CAP_READWRITE; // Only float-type formats are considered blendable in OpenGL switch (fmt->type) { case PL_FMT_UNKNOWN: case PL_FMT_UINT: case PL_FMT_SINT: break; case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: if (fmt->caps & PL_FMT_CAP_RENDERABLE) fmt->caps |= PL_FMT_CAP_BLENDABLE; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } // TODO: Texel buffers PL_ARRAY_APPEND_RAW(gpu, gpu->formats, gpu->num_formats, fmt); } #define DO_FORMATS(formats) \ do { \ for (int i = 0; i < PL_ARRAY_SIZE(formats); i++) \ add_format(gpu, &formats[i]); \ } while (0) bool gl_setup_formats(struct pl_gpu *gpu) { struct pl_gl *p = PL_PRIV(gpu); #if defined(PL_HAVE_UNIX) && defined(EPOXY_HAS_EGL) if (p->has_modifiers) { EGLint num_formats = 0; bool ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, 0, NULL, &num_formats); if (ok && num_formats) { p->egl_formats.elem = pl_calloc(gpu, num_formats, sizeof(EGLint)); p->egl_formats.num = num_formats; ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, num_formats, p->egl_formats.elem, &num_formats); pl_assert(ok); PL_DEBUG(gpu, "EGL formats supported:"); for (int i = 0; i < num_formats; ++i) { PL_DEBUG(gpu, " 0x%08x(%.4s)", p->egl_formats.elem[i], PRINT_FOURCC(p->egl_formats.elem[i])); } } } #endif if (p->gl_ver >= 30) { // Desktop GL3+ has everything DO_FORMATS(formats_norm8); DO_FORMATS(formats_bgra8); DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fbo); DO_FORMATS(formats_float); DO_FORMATS(formats_uint); goto done; } if (p->gl_ver >= 21) { // If we have a reasonable set of extensions, we can enable most // things. Otherwise, pick simple fallback formats if (epoxy_has_gl_extension("GL_ARB_texture_float") && epoxy_has_gl_extension("GL_ARB_texture_rg") && epoxy_has_gl_extension("GL_ARB_framebuffer_object")) { DO_FORMATS(formats_norm8); DO_FORMATS(formats_bgra8); DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fbo); DO_FORMATS(formats_float); } else { // Fallback for GL2 DO_FORMATS(formats_legacy_gl2); DO_FORMATS(formats_basic_vertex); } goto done; } if (p->gles_ver >= 30) { // GLES 3.0 has some basic formats, with framebuffers for float16 // depending on GL_EXT_color_buffer_(half_)float support DO_FORMATS(formats_norm8); if (epoxy_has_gl_extension("GL_EXT_texture_norm16")) { DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fallback); } if (epoxy_has_gl_extension("GL_EXT_texture_format_BGRA8888")) DO_FORMATS(formats_bgra_gles); if (epoxy_has_gl_extension("GL_EXT_texture_integer")) DO_FORMATS(formats_uint); DO_FORMATS(formats_basic_vertex); if (p->gles_ver >= 32 || epoxy_has_gl_extension("GL_EXT_color_buffer_half_float") || epoxy_has_gl_extension("GL_EXT_color_buffer_float")) { DO_FORMATS(formats_float16_fbo); } else { DO_FORMATS(formats_float16_fallback); } goto done; } if (p->gles_ver >= 20) { // GLES 2.0 only has some legacy fallback formats, with support for // float16 depending on GL_EXT_texture_norm16 being present DO_FORMATS(formats_legacy_gles2); DO_FORMATS(formats_basic_vertex); if (epoxy_has_gl_extension("GL_EXT_texture_rg")) { DO_FORMATS(formats_norm8); } if (epoxy_has_gl_extension("GL_EXT_texture_format_BGRA8888")) { DO_FORMATS(formats_bgra_gles); } goto done; } // Last resort fallback. Probably not very useful DO_FORMATS(formats_basic_vertex); goto done; done: return gl_check_err(gpu, "gl_setup_formats"); } libplacebo-v4.192.1/src/opengl/formats.h000066400000000000000000000022301417677245700200720ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" struct gl_format { GLint ifmt; // sized internal format (e.g. GL_RGBA16F) GLenum fmt; // base internal format (e.g. GL_RGBA) GLenum type; // host-visible type (e.g. GL_FLOAT) struct pl_fmt tmpl; // pl_fmt template }; typedef void (gl_format_cb)(pl_gpu gpu, const struct gl_format *glfmt); // Add all supported formats to the `pl_gpu` format list. bool gl_setup_formats(struct pl_gpu *gpu); libplacebo-v4.192.1/src/opengl/gpu.c000066400000000000000000000464431417677245700172230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "common.h" #include "formats.h" #include "utils.h" #ifdef PL_HAVE_UNIX #include #endif #ifdef PL_HAVE_WIN32 #include #include #endif static const struct pl_gpu_fns pl_fns_gl; static void gl_gpu_destroy(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); pl_gpu_finish(gpu); while (p->callbacks.num > 0) gl_poll_callbacks(gpu); pl_free((void *) gpu); } pl_opengl pl_opengl_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == gl_gpu_destroy) { struct pl_gl *p = (struct pl_gl *) impl; return p->gl; } return NULL; } #ifdef EPOXY_HAS_EGL static pl_handle_caps tex_handle_caps(pl_gpu gpu, bool import) { pl_handle_caps caps = 0; struct pl_gl *p = PL_PRIV(gpu); if (!p->egl_dpy) return 0; if (import) { if (epoxy_has_egl_extension(p->egl_dpy, "EGL_EXT_image_dma_buf_import")) caps |= PL_HANDLE_DMA_BUF; } else if (!import && p->egl_ctx) { if (epoxy_has_egl_extension(p->egl_dpy, "EGL_MESA_image_dma_buf_export")) caps |= PL_HANDLE_DMA_BUF; } return caps; } #endif // EPOXY_HAS_EGL static inline size_t get_page_size(void) { #ifdef PL_HAVE_UNIX return sysconf(_SC_PAGESIZE); #endif #ifdef PL_HAVE_WIN32 SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); return sysInfo.dwAllocationGranularity; #endif pl_assert(!"Unsupported platform!"); } #define get(pname, field) \ do { \ GLint tmp = 0; \ glGetIntegerv((pname), &tmp); \ *(field) = tmp; \ } while (0) #define geti(pname, i, field) \ do { \ GLint tmp = 0; \ glGetIntegeri_v((pname), i, &tmp); \ *(field) = tmp; \ } while (0) pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl gl, const struct pl_opengl_params *params) { struct pl_gpu *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gl); gpu->log = log; gpu->ctx = gpu->log; struct pl_gl *p = PL_PRIV(gpu); p->impl = pl_fns_gl; p->gl = gl; struct pl_glsl_version *glsl = &gpu->glsl; int ver = epoxy_gl_version(); glsl->gles = !epoxy_is_desktop_gl(); p->gl_ver = glsl->gles ? 0 : ver; p->gles_ver = glsl->gles ? ver : 0; // If possible, query the GLSL version from the implementation const char *glslver = (char *) glGetString(GL_SHADING_LANGUAGE_VERSION); if (glslver) { PL_INFO(gpu, " GL_SHADING_LANGUAGE_VERSION: %s", glslver); int major = 0, minor = 0; if (sscanf(glslver, "%d.%d", &major, &minor) == 2) glsl->version = major * 100 + minor; } if (!glsl->version) { // Otherwise, use the fixed magic versions 100 and 300 for GLES. if (p->gles_ver >= 30) { glsl->version = 300; } else if (p->gles_ver >= 20) { glsl->version = 100; } else { goto error; } } if (gl_test_ext(gpu, "GL_ARB_compute_shader", 43, 0)) { glsl->compute = true; get(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &glsl->max_shmem_size); get(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glsl->max_group_threads); for (int i = 0; i < 3; i++) geti(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, &glsl->max_group_size[i]); } if (gl_test_ext(gpu, "GL_ARB_texture_gather", 40, 0)) { get(GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB, &p->gather_comps); get(GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->min_gather_offset); get(GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->max_gather_offset); } // Query all device limits struct pl_gpu_limits *limits = &gpu->limits; limits->thread_safe = params->make_current; limits->callbacks = gl_test_ext(gpu, "GL_ARB_sync", 32, 30); if (gl_test_ext(gpu, "GL_ARB_pixel_buffer_object", 31, 0)) limits->max_buf_size = SIZE_MAX; // no restriction imposed by GL if (gl_test_ext(gpu, "GL_ARB_uniform_buffer_object", 31, 0)) get(GL_MAX_UNIFORM_BLOCK_SIZE, &limits->max_ubo_size); if (gl_test_ext(gpu, "GL_ARB_shader_storage_buffer_object", 43, 0)) get(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &limits->max_ssbo_size); limits->max_vbo_size = limits->max_buf_size; // No additional restrictions if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0)) limits->max_mapped_size = limits->max_buf_size; limits->align_vertex_stride = 1; get(GL_MAX_TEXTURE_SIZE, &limits->max_tex_2d_dim); if (gl_test_ext(gpu, "GL_EXT_texture3D", 21, 30)) get(GL_MAX_3D_TEXTURE_SIZE, &limits->max_tex_3d_dim); // There's no equivalent limit for 1D textures for whatever reason, so // just set it to the same as the 2D limit if (p->gl_ver >= 21) limits->max_tex_1d_dim = limits->max_tex_2d_dim; limits->buf_transfer = true; if (p->gl_ver || p->gles_ver >= 30) { get(GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &limits->max_variable_comps); } else { // fallback for GLES 2.0, which doesn't have max_comps get(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &limits->max_variable_comps); limits->max_variable_comps *= 4; } if (glsl->compute) { for (int i = 0; i < 3; i++) geti(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, &limits->max_dispatch[i]); } // Query import/export support #ifdef EPOXY_HAS_EGL p->egl_dpy = params->egl_display; p->egl_ctx = params->egl_context; gpu->export_caps.tex = tex_handle_caps(gpu, false); gpu->import_caps.tex = tex_handle_caps(gpu, true); if (p->egl_dpy) { p->has_modifiers = epoxy_has_egl_extension(p->egl_dpy, "EGL_EXT_image_dma_buf_import_modifiers"); } #endif if (epoxy_has_gl_extension("GL_AMD_pinned_memory")) { gpu->import_caps.buf |= PL_HANDLE_HOST_PTR; gpu->limits.align_host_ptr = get_page_size(); } // Cache some internal capability checks p->has_stride = gl_test_ext(gpu, "GL_EXT_unpack_subimage", 11, 30); p->has_unpack_image_height = p->gl_ver >= 12 || p->gles_ver >= 30; p->has_vao = gl_test_ext(gpu, "GL_ARB_vertex_array_object", 30, 0); p->has_invalidate_fb = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 30); p->has_invalidate_tex = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 0); p->has_queries = gl_test_ext(gpu, "GL_ARB_timer_query", 33, 0); p->has_fbos = gl_test_ext(gpu, "GL_ARB_framebuffer_object", 30, 20); p->has_storage = gl_test_ext(gpu, "GL_ARB_shader_image_load_store", 42, 0); p->has_readback = p->has_fbos; if (p->has_readback && p->gles_ver) { GLuint fbo = 0, tex = 0; GLint read_type = 0, read_fmt = 0; glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, tex); glGenFramebuffers(1, &fbo); glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, 64, 64, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex, 0); glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); if (read_type != GL_UNSIGNED_BYTE || read_fmt != GL_RED) { PL_INFO(gpu, "GPU does not seem to support lossless texture " "readback, restricting readback capabilities! This is a " "GLES/driver limitation, there is little we can do to " "work around it."); p->has_readback = false; } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); glDeleteFramebuffers(1, &fbo); glDeleteTextures(1, &tex); } // We simply don't know, so make up some values limits->align_tex_xfer_offset = 32; limits->align_tex_xfer_pitch = 1; limits->fragment_queues = 1; limits->compute_queues = 1; if (gl_test_ext(gpu, "GL_EXT_unpack_subimage", 11, 30)) limits->align_tex_xfer_pitch = 4; if (!gl_check_err(gpu, "pl_gpu_create_gl")) { PL_WARN(gpu, "Encountered errors while detecting GPU capabilities... " "ignoring, but expect limitations/issues"); p->failed = false; } // Filter out error messages during format probing pl_log_level_cap(gpu->log, PL_LOG_INFO); bool formats_ok = gl_setup_formats(gpu); pl_log_level_cap(gpu->log, PL_LOG_NONE); if (!formats_ok) goto error; return pl_gpu_finalize(gpu); error: gl_gpu_destroy(gpu); return NULL; } void gl_buf_destroy(pl_gpu gpu, pl_buf buf) { if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing buffer, leaking resources!"); return; } struct pl_buf_gl *buf_gl = PL_PRIV(buf); if (buf_gl->fence) glDeleteSync(buf_gl->fence); if (buf_gl->mapped) { glBindBuffer(GL_COPY_WRITE_BUFFER, buf_gl->buffer); glUnmapBuffer(GL_COPY_WRITE_BUFFER); glBindBuffer(GL_COPY_WRITE_BUFFER, 0); } glDeleteBuffers(1, &buf_gl->buffer); gl_check_err(gpu, "gl_buf_destroy"); RELEASE_CURRENT(); pl_free((void *) buf); } pl_buf gl_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { if (!MAKE_CURRENT()) return NULL; struct pl_buf *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_gl); buf->params = *params; buf->params.initial_data = NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_buf_gl *buf_gl = PL_PRIV(buf); buf_gl->id = ++p->buf_id; // Just use this since the generic GL_BUFFER doesn't work GLenum target = GL_ARRAY_BUFFER; const void *data = params->initial_data; size_t total_size = params->size; bool import = false; if (params->import_handle == PL_HANDLE_HOST_PTR) { const struct pl_shared_mem *shmem = ¶ms->shared_mem; target = GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD; data = shmem->handle.ptr; buf_gl->offset = shmem->offset; total_size = shmem->size; import = true; if (params->host_mapped) buf->data = (uint8_t *) data + buf_gl->offset; if (buf_gl->offset > 0 && params->drawable) { PL_ERR(gpu, "Cannot combine non-aligned host pointer imports with " "drawable (vertex) buffers! This is a design limitation, " "open an issue if you absolutely need this."); goto error; } } glGenBuffers(1, &buf_gl->buffer); glBindBuffer(target, buf_gl->buffer); if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0) && !import) { GLbitfield mapflags = 0, storflags = 0; if (params->host_writable) storflags |= GL_DYNAMIC_STORAGE_BIT; if (params->host_mapped) { mapflags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; } if (params->memory_type == PL_BUF_MEM_HOST) storflags |= GL_CLIENT_STORAGE_BIT; // hopefully this works glBufferStorage(target, total_size, data, storflags | mapflags); if (params->host_mapped) { buf_gl->mapped = true; buf->data = glMapBufferRange(target, buf_gl->offset, params->size, mapflags); if (!buf->data) { glBindBuffer(target, 0); if (!gl_check_err(gpu, "gl_buf_create: map")) PL_ERR(gpu, "Failed mapping buffer: unknown reason"); goto error; } } } else { // Make a random guess based on arbitrary criteria we can't know GLenum hint = GL_STREAM_DRAW; if (params->initial_data && !params->host_writable && !params->host_mapped) hint = GL_STATIC_DRAW; if (params->host_readable && !params->host_writable && !params->host_mapped) hint = GL_STREAM_READ; if (params->storable) hint = GL_DYNAMIC_COPY; glBufferData(target, total_size, data, hint); if (import && glGetError() == GL_INVALID_OPERATION) { PL_ERR(gpu, "Failed importing host pointer!"); goto error; } } glBindBuffer(target, 0); if (!gl_check_err(gpu, "gl_buf_create")) goto error; if (params->storable) { buf_gl->barrier = GL_BUFFER_UPDATE_BARRIER_BIT | // for buf_copy etc. GL_PIXEL_BUFFER_BARRIER_BIT | // for tex_upload GL_SHADER_STORAGE_BARRIER_BIT; if (params->host_mapped) buf_gl->barrier |= GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT; if (params->uniform) buf_gl->barrier |= GL_UNIFORM_BARRIER_BIT; if (params->drawable) buf_gl->barrier |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT; } RELEASE_CURRENT(); return buf; error: gl_buf_destroy(gpu, buf); RELEASE_CURRENT(); return NULL; } bool gl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout) { // Non-persistently mapped buffers are always implicitly reusable in OpenGL, // the implementation will create more buffers under the hood if needed. if (!buf->data) return false; if (!MAKE_CURRENT()) return true; // conservative guess struct pl_buf_gl *buf_gl = PL_PRIV(buf); if (buf_gl->fence) { GLenum res = glClientWaitSync(buf_gl->fence, timeout ? GL_SYNC_FLUSH_COMMANDS_BIT : 0, timeout); if (res == GL_ALREADY_SIGNALED || res == GL_CONDITION_SATISFIED) { glDeleteSync(buf_gl->fence); buf_gl->fence = NULL; } } gl_poll_callbacks(gpu); RELEASE_CURRENT(); return !!buf_gl->fence; } void gl_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { if (!MAKE_CURRENT()) return; struct pl_buf_gl *buf_gl = PL_PRIV(buf); glBindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); glBufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, data); glBindBuffer(GL_ARRAY_BUFFER, 0); gl_check_err(gpu, "gl_buf_write"); RELEASE_CURRENT(); } bool gl_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { if (!MAKE_CURRENT()) return false; struct pl_buf_gl *buf_gl = PL_PRIV(buf); glBindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); glGetBufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, dest); glBindBuffer(GL_ARRAY_BUFFER, 0); bool ok = gl_check_err(gpu, "gl_buf_read"); RELEASE_CURRENT(); return ok; } void gl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { if (!MAKE_CURRENT()) return; struct pl_buf_gl *src_gl = PL_PRIV(src); struct pl_buf_gl *dst_gl = PL_PRIV(dst); glBindBuffer(GL_COPY_READ_BUFFER, src_gl->buffer); glBindBuffer(GL_COPY_WRITE_BUFFER, dst_gl->buffer); glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, src_gl->offset + src_offset, dst_gl->offset + dst_offset, size); gl_check_err(gpu, "gl_buf_copy"); RELEASE_CURRENT(); } #define QUERY_OBJECT_NUM 8 struct pl_timer { GLuint query[QUERY_OBJECT_NUM]; int index_write; // next index to write to int index_read; // next index to read from }; static pl_timer gl_timer_create(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); if (!p->has_queries || !MAKE_CURRENT()) return NULL; pl_timer timer = pl_zalloc_ptr(NULL, timer); glGenQueries(QUERY_OBJECT_NUM, timer->query); RELEASE_CURRENT(); return timer; } static void gl_timer_destroy(pl_gpu gpu, pl_timer timer) { if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing timer, leaking resources!"); return; } glDeleteQueries(QUERY_OBJECT_NUM, timer->query); gl_check_err(gpu, "gl_timer_destroy"); RELEASE_CURRENT(); pl_free(timer); } static uint64_t gl_timer_query(pl_gpu gpu, pl_timer timer) { if (timer->index_read == timer->index_write) return 0; // no more unprocessed results if (!MAKE_CURRENT()) return 0; uint64_t res = 0; GLuint query = timer->query[timer->index_read]; int avail = 0; glGetQueryObjectiv(query, GL_QUERY_RESULT_AVAILABLE, &avail); if (!avail) goto done; glGetQueryObjectui64v(query, GL_QUERY_RESULT, &res); timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; // fall through done: RELEASE_CURRENT(); return res; } void gl_timer_begin(pl_timer timer) { if (!timer) return; glBeginQuery(GL_TIME_ELAPSED, timer->query[timer->index_write]); } void gl_timer_end(pl_timer timer) { if (!timer) return; glEndQuery(GL_TIME_ELAPSED); timer->index_write = (timer->index_write + 1) % QUERY_OBJECT_NUM; if (timer->index_write == timer->index_read) { // forcibly drop the least recent result to make space timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; } } static void gl_gpu_flush(pl_gpu gpu) { if (!MAKE_CURRENT()) return; glFlush(); gl_check_err(gpu, "gl_gpu_flush"); RELEASE_CURRENT(); } static void gl_gpu_finish(pl_gpu gpu) { if (!MAKE_CURRENT()) return; glFinish(); gl_check_err(gpu, "gl_gpu_finish"); RELEASE_CURRENT(); } static bool gl_gpu_is_failed(pl_gpu gpu) { struct pl_gl *gl = PL_PRIV(gpu); return gl->failed; } static const struct pl_gpu_fns pl_fns_gl = { .destroy = gl_gpu_destroy, .tex_create = gl_tex_create, .tex_destroy = gl_tex_destroy, .tex_invalidate = gl_tex_invalidate, .tex_clear_ex = gl_tex_clear_ex, .tex_blit = gl_tex_blit, .tex_upload = gl_tex_upload, .tex_download = gl_tex_download, .buf_create = gl_buf_create, .buf_destroy = gl_buf_destroy, .buf_write = gl_buf_write, .buf_read = gl_buf_read, .buf_copy = gl_buf_copy, .buf_poll = gl_buf_poll, .desc_namespace = gl_desc_namespace, .pass_create = gl_pass_create, .pass_destroy = gl_pass_destroy, .pass_run = gl_pass_run, .timer_create = gl_timer_create, .timer_destroy = gl_timer_destroy, .timer_query = gl_timer_query, .gpu_flush = gl_gpu_flush, .gpu_finish = gl_gpu_finish, .gpu_is_failed = gl_gpu_is_failed, }; libplacebo-v4.192.1/src/opengl/gpu.h000066400000000000000000000073261417677245700172250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../gpu.h" #include "common.h" // Thread safety: Unsafe, same as pl_gpu_destroy pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl gl, const struct pl_opengl_params *params); // --- pl_gpu internal structs and functions struct pl_gl { struct pl_gpu_fns impl; pl_opengl gl; bool failed; #ifdef EPOXY_HAS_EGL // For import/export EGLDisplay egl_dpy; EGLContext egl_ctx; # ifdef PL_HAVE_UNIX // List of formats supported by EGL_EXT_image_dma_buf_import PL_ARRAY(EGLint) egl_formats; # endif #endif //!EPOXY_HAS_EGL // Sync objects and associated callbacks PL_ARRAY(struct gl_cb) callbacks; // Incrementing counters to keep track of object uniqueness int buf_id; // Cached capabilities int gl_ver; int gles_ver; bool has_fbos; bool has_storage; bool has_stride; bool has_unpack_image_height; bool has_invalidate_fb; bool has_invalidate_tex; bool has_vao; bool has_queries; bool has_modifiers; bool has_readback; int gather_comps; }; void gl_timer_begin(pl_timer timer); void gl_timer_end(pl_timer timer); static inline bool _make_current(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); if (!gl_make_current(p->gl)) { p->failed = true; return false; } return true; } static inline void _release_current(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); gl_release_current(p->gl); } #define MAKE_CURRENT() _make_current(gpu) #define RELEASE_CURRENT() _release_current(gpu) struct pl_tex_gl { GLenum target; GLuint texture; bool wrapped_tex; GLuint fbo; // or 0 bool wrapped_fb; GLbitfield barrier; // GL format fields GLenum format; GLint iformat; GLenum type; // For imported/exported textures #ifdef EPOXY_HAS_EGL EGLImageKHR image; #endif int fd; }; pl_tex gl_tex_create(pl_gpu, const struct pl_tex_params *); void gl_tex_destroy(pl_gpu, pl_tex); void gl_tex_invalidate(pl_gpu, pl_tex); void gl_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color); void gl_tex_blit(pl_gpu, const struct pl_tex_blit_params *); bool gl_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); bool gl_tex_download(pl_gpu, const struct pl_tex_transfer_params *); struct pl_buf_gl { uint64_t id; // unique per buffer GLuint buffer; size_t offset; GLsync fence; GLbitfield barrier; bool mapped; }; pl_buf gl_buf_create(pl_gpu, const struct pl_buf_params *); void gl_buf_destroy(pl_gpu, pl_buf); void gl_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size); bool gl_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size); void gl_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); bool gl_buf_poll(pl_gpu, pl_buf, uint64_t timeout); struct pl_pass_gl; int gl_desc_namespace(pl_gpu, enum pl_desc_type type); pl_pass gl_pass_create(pl_gpu, const struct pl_pass_params *); void gl_pass_destroy(pl_gpu, pl_pass); void gl_pass_run(pl_gpu, const struct pl_pass_run_params *); libplacebo-v4.192.1/src/opengl/gpu_pass.c000066400000000000000000000565571417677245700202600ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "utils.h" int gl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return (int) type; } #define CACHE_MAGIC {'P','L','G','L'} #define CACHE_VERSION 1 static const char gl_cache_magic[4] = CACHE_MAGIC; struct gl_cache_header { char magic[sizeof(gl_cache_magic)]; int cache_version; GLenum format; }; static GLuint load_cached_program(pl_gpu gpu, const struct pl_pass_params *params) { if (!gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) return 0; pl_str cache = { .buf = (void *) params->cached_program, .len = params->cached_program_len, }; if (cache.len < sizeof(struct gl_cache_header)) return false; struct gl_cache_header *header = (struct gl_cache_header *) cache.buf; cache = pl_str_drop(cache, sizeof(*header)); if (strncmp(header->magic, gl_cache_magic, sizeof(gl_cache_magic)) != 0) return 0; if (header->cache_version != CACHE_VERSION) return 0; GLuint prog = glCreateProgram(); if (!gl_check_err(gpu, "load_cached_program: glCreateProgram")) return 0; glProgramBinary(prog, header->format, cache.buf, cache.len); glGetError(); // discard potential useless error GLint status = 0; glGetProgramiv(prog, GL_LINK_STATUS, &status); if (status) return prog; glDeleteProgram(prog); gl_check_err(gpu, "load_cached_program: glProgramBinary"); return 0; } static enum pl_log_level gl_log_level(GLint status, GLint log_length) { if (!status) { return PL_LOG_ERR; } else if (log_length > 0) { return PL_LOG_INFO; } else { return PL_LOG_DEBUG; } } static bool gl_attach_shader(pl_gpu gpu, GLuint program, GLenum type, const char *src) { GLuint shader = glCreateShader(type); glShaderSource(shader, 1, &src, NULL); glCompileShader(shader); GLint status = 0; glGetShaderiv(shader, GL_COMPILE_STATUS, &status); GLint log_length = 0; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); enum pl_log_level level = gl_log_level(status, log_length); if (pl_msg_test(gpu->log, level)) { static const char *shader_name; switch (type) { case GL_VERTEX_SHADER: shader_name = "vertex"; break; case GL_FRAGMENT_SHADER: shader_name = "fragment"; break; case GL_COMPUTE_SHADER: shader_name = "compute"; break; default: pl_unreachable(); }; PL_MSG(gpu, level, "%s shader source:", shader_name); pl_msg_source(gpu->log, level, src); GLchar *logstr = pl_zalloc(NULL, log_length + 1); glGetShaderInfoLog(shader, log_length, NULL, logstr); PL_MSG(gpu, level, "shader compile log (status=%d): %s", status, logstr); pl_free(logstr); } if (!status || !gl_check_err(gpu, "gl_attach_shader")) goto error; glAttachShader(program, shader); glDeleteShader(shader); return true; error: glDeleteShader(shader); return false; } static GLuint gl_compile_program(pl_gpu gpu, const struct pl_pass_params *params) { GLuint prog = glCreateProgram(); bool ok = true; switch (params->type) { case PL_PASS_COMPUTE: ok &= gl_attach_shader(gpu, prog, GL_COMPUTE_SHADER, params->glsl_shader); break; case PL_PASS_RASTER: ok &= gl_attach_shader(gpu, prog, GL_VERTEX_SHADER, params->vertex_shader); ok &= gl_attach_shader(gpu, prog, GL_FRAGMENT_SHADER, params->glsl_shader); for (int i = 0; i < params->num_vertex_attribs; i++) glBindAttribLocation(prog, i, params->vertex_attribs[i].name); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } if (!ok || !gl_check_err(gpu, "gl_compile_program: attach shader")) goto error; glLinkProgram(prog); GLint status = 0; glGetProgramiv(prog, GL_LINK_STATUS, &status); GLint log_length = 0; glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_length); enum pl_log_level level = gl_log_level(status, log_length); if (pl_msg_test(gpu->log, level)) { GLchar *logstr = pl_zalloc(NULL, log_length + 1); glGetProgramInfoLog(prog, log_length, NULL, logstr); PL_MSG(gpu, level, "shader link log (status=%d): %s", status, logstr); pl_free(logstr); } if (!gl_check_err(gpu, "gl_compile_program: link program")) goto error; return prog; error: glDeleteProgram(prog); PL_ERR(gpu, "Failed compiling/linking GLSL program"); return 0; } // For pl_pass.priv struct pl_pass_gl { GLuint program; GLuint vao; // the VAO object uint64_t vao_id; // buf_gl.id of VAO size_t vao_offset; // VBO offset of VAO GLuint buffer; // VBO for raw vertex pointers GLuint index_buffer; GLint *var_locs; }; void gl_pass_destroy(pl_gpu gpu, pl_pass pass) { if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing pass, leaking resources!"); return; } struct pl_pass_gl *pass_gl = PL_PRIV(pass); if (pass_gl->vao) glDeleteVertexArrays(1, &pass_gl->vao); glDeleteBuffers(1, &pass_gl->index_buffer); glDeleteBuffers(1, &pass_gl->buffer); glDeleteProgram(pass_gl->program); gl_check_err(gpu, "gl_pass_destroy"); RELEASE_CURRENT(); pl_free((void *) pass); } static void gl_update_va(pl_pass pass, size_t vbo_offset) { for (int i = 0; i < pass->params.num_vertex_attribs; i++) { const struct pl_vertex_attrib *va = &pass->params.vertex_attribs[i]; const struct gl_format **glfmtp = PL_PRIV(va->fmt); const struct gl_format *glfmt = *glfmtp; bool norm = false; switch (va->fmt->type) { case PL_FMT_UNORM: case PL_FMT_SNORM: norm = true; break; case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UINT: case PL_FMT_SINT: break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } glEnableVertexAttribArray(i); glVertexAttribPointer(i, va->fmt->num_components, glfmt->type, norm, pass->params.vertex_stride, (void *) (va->offset + vbo_offset)); } } pl_pass gl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_pass *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_gl); struct pl_pass_gl *pass_gl = PL_PRIV(pass); pass->params = pl_pass_params_copy(pass, params); // Load/Compile program if ((pass_gl->program = load_cached_program(gpu, params))) { PL_DEBUG(gpu, "Using cached GL program"); } else { clock_t start = clock(); pass_gl->program = gl_compile_program(gpu, params); pl_log_cpu_time(gpu->log, start, clock(), "compiling shader"); } if (!pass_gl->program) goto error; // Update program cache if possible if (gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) { GLint size = 0; glGetProgramiv(pass_gl->program, GL_PROGRAM_BINARY_LENGTH, &size); if (size > 0) { uint8_t *buffer = pl_alloc(NULL, size); GLsizei actual_size = 0; struct gl_cache_header header = { .magic = CACHE_MAGIC, .cache_version = CACHE_VERSION, }; glGetProgramBinary(pass_gl->program, size, &actual_size, &header.format, buffer); if (actual_size > 0) { pl_str cache = {0}; pl_str_append(pass, &cache, (pl_str) { (void *) &header, sizeof(header) }); pl_str_append(pass, &cache, (pl_str) { buffer, actual_size }); pass->params.cached_program = cache.buf; pass->params.cached_program_len = cache.len; } pl_free(buffer); } if (!gl_check_err(gpu, "gl_pass_create: get program binary")) { PL_WARN(gpu, "Failed generating program binary.. ignoring"); pl_free((void *) pass->params.cached_program); pass->params.cached_program = NULL; pass->params.cached_program_len = 0; } } glUseProgram(pass_gl->program); pass_gl->var_locs = pl_calloc(pass, params->num_variables, sizeof(GLint)); for (int i = 0; i < params->num_variables; i++) { pass_gl->var_locs[i] = glGetUniformLocation(pass_gl->program, params->variables[i].name); // Due to OpenGL API restrictions, we need to ensure that this is a // variable type we can actually *update*. Fortunately, this is easily // checked by virtue of the fact that all legal combinations of // parameters will have a valid GLSL type name if (!pl_var_glsl_type_name(params->variables[i])) { glUseProgram(0); PL_ERR(gpu, "Input variable '%s' does not match any known type!", params->variables[i].name); goto error; } } for (int i = 0; i < params->num_descriptors; i++) { // For compatibility with older OpenGL, we need to explicitly update // the texture/image unit bindings after creating the shader program, // since specifying it directly requires GLSL 4.20+ GLint loc = glGetUniformLocation(pass_gl->program, params->descriptors[i].name); glUniform1i(loc, params->descriptors[i].binding); } glUseProgram(0); // Initialize the VAO and single vertex buffer glGenBuffers(1, &pass_gl->buffer); if (p->has_vao) { glGenVertexArrays(1, &pass_gl->vao); glBindBuffer(GL_ARRAY_BUFFER, pass_gl->buffer); glBindVertexArray(pass_gl->vao); gl_update_va(pass, 0); glBindVertexArray(0); glBindBuffer(GL_ARRAY_BUFFER, 0); } if (!gl_check_err(gpu, "gl_pass_create")) goto error; RELEASE_CURRENT(); return pass; error: PL_ERR(gpu, "Failed creating pass"); gl_pass_destroy(gpu, pass); RELEASE_CURRENT(); return NULL; } static void update_var(pl_pass pass, const struct pl_var_update *vu) { struct pl_pass_gl *pass_gl = PL_PRIV(pass); const struct pl_var *var = &pass->params.variables[vu->index]; GLint loc = pass_gl->var_locs[vu->index]; switch (var->type) { case PL_VAR_SINT: { const int *i = vu->data; pl_assert(var->dim_m == 1); switch (var->dim_v) { case 1: glUniform1iv(loc, var->dim_a, i); break; case 2: glUniform2iv(loc, var->dim_a, i); break; case 3: glUniform3iv(loc, var->dim_a, i); break; case 4: glUniform4iv(loc, var->dim_a, i); break; default: pl_unreachable(); } return; } case PL_VAR_UINT: { const unsigned int *u = vu->data; pl_assert(var->dim_m == 1); switch (var->dim_v) { case 1: glUniform1uiv(loc, var->dim_a, u); break; case 2: glUniform2uiv(loc, var->dim_a, u); break; case 3: glUniform3uiv(loc, var->dim_a, u); break; case 4: glUniform4uiv(loc, var->dim_a, u); break; default: pl_unreachable(); } return; } case PL_VAR_FLOAT: { const float *f = vu->data; if (var->dim_m == 1) { switch (var->dim_v) { case 1: glUniform1fv(loc, var->dim_a, f); break; case 2: glUniform2fv(loc, var->dim_a, f); break; case 3: glUniform3fv(loc, var->dim_a, f); break; case 4: glUniform4fv(loc, var->dim_a, f); break; default: pl_unreachable(); } } else if (var->dim_m == 2 && var->dim_v == 2) { glUniformMatrix2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 3) { glUniformMatrix3fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 4) { glUniformMatrix4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 2 && var->dim_v == 3) { glUniformMatrix2x3fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 2) { glUniformMatrix3x2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 2 && var->dim_v == 4) { glUniformMatrix2x4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 2) { glUniformMatrix4x2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 4) { glUniformMatrix3x4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 3) { glUniformMatrix4x3fv(loc, var->dim_a, GL_FALSE, f); } else { pl_unreachable(); } return; } case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } static void update_desc(pl_pass pass, int index, const struct pl_desc_binding *db) { const struct pl_desc *desc = &pass->params.descriptors[index]; static const GLenum access[] = { [PL_DESC_ACCESS_READWRITE] = GL_READ_WRITE, [PL_DESC_ACCESS_READONLY] = GL_READ_ONLY, [PL_DESC_ACCESS_WRITEONLY] = GL_WRITE_ONLY, }; static const GLint wraps[PL_TEX_ADDRESS_MODE_COUNT] = { [PL_TEX_ADDRESS_CLAMP] = GL_CLAMP_TO_EDGE, [PL_TEX_ADDRESS_REPEAT] = GL_REPEAT, [PL_TEX_ADDRESS_MIRROR] = GL_MIRRORED_REPEAT, }; static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, }; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); glActiveTexture(GL_TEXTURE0 + desc->binding); glBindTexture(tex_gl->target, tex_gl->texture); GLint filter = filters[db->sample_mode]; GLint wrap = wraps[db->address_mode]; glTexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); glTexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); switch (pl_tex_params_dimension(tex->params)) { case 3: glTexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); // fall through case 2: glTexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); // fall through case 1: glTexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); break; } return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); glBindImageTexture(desc->binding, tex_gl->texture, 0, GL_FALSE, 0, access[desc->access], tex_gl->iformat); return; } case PL_DESC_BUF_UNIFORM: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); glBindBufferRange(GL_UNIFORM_BUFFER, desc->binding, buf_gl->buffer, buf_gl->offset, buf->params.size); return; } case PL_DESC_BUF_STORAGE: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, desc->binding, buf_gl->buffer, buf_gl->offset, buf->params.size); return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: assert(!"unimplemented"); // TODO case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void unbind_desc(pl_pass pass, int index, const struct pl_desc_binding *db) { const struct pl_desc *desc = &pass->params.descriptors[index]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); glActiveTexture(GL_TEXTURE0 + desc->binding); glBindTexture(tex_gl->target, 0); return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); glBindImageTexture(desc->binding, 0, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32F); if (desc->access != PL_DESC_ACCESS_READONLY) glMemoryBarrier(tex_gl->barrier); return; } case PL_DESC_BUF_UNIFORM: glBindBufferBase(GL_UNIFORM_BUFFER, desc->binding, 0); return; case PL_DESC_BUF_STORAGE: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, desc->binding, 0); if (desc->access != PL_DESC_ACCESS_READONLY) glMemoryBarrier(buf_gl->barrier); return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: assert(!"unimplemented"); // TODO case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } void gl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { if (!MAKE_CURRENT()) return; pl_pass pass = params->pass; struct pl_pass_gl *pass_gl = PL_PRIV(pass); struct pl_gl *p = PL_PRIV(gpu); glUseProgram(pass_gl->program); for (int i = 0; i < params->num_var_updates; i++) update_var(pass, ¶ms->var_updates[i]); for (int i = 0; i < pass->params.num_descriptors; i++) update_desc(pass, i, ¶ms->desc_bindings[i]); glActiveTexture(GL_TEXTURE0); if (!gl_check_err(gpu, "gl_pass_run: updating uniforms")) { RELEASE_CURRENT(); return; } switch (pass->params.type) { case PL_PASS_RASTER: { struct pl_tex_gl *target_gl = PL_PRIV(params->target); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, target_gl->fbo); if (!pass->params.load_target && p->has_invalidate_fb) { GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &fb); } glViewport(params->viewport.x0, params->viewport.y0, pl_rect_w(params->viewport), pl_rect_h(params->viewport)); glScissor(params->scissors.x0, params->scissors.y0, pl_rect_w(params->scissors), pl_rect_h(params->scissors)); glEnable(GL_SCISSOR_TEST); glDisable(GL_DEPTH_TEST); glDisable(GL_CULL_FACE); gl_check_err(gpu, "gl_pass_run: enabling viewport/scissor"); const struct pl_blend_params *blend = pass->params.blend_params; if (blend) { static const GLenum map_blend[] = { [PL_BLEND_ZERO] = GL_ZERO, [PL_BLEND_ONE] = GL_ONE, [PL_BLEND_SRC_ALPHA] = GL_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = GL_ONE_MINUS_SRC_ALPHA, }; glBlendFuncSeparate(map_blend[blend->src_rgb], map_blend[blend->dst_rgb], map_blend[blend->src_alpha], map_blend[blend->dst_alpha]); glEnable(GL_BLEND); gl_check_err(gpu, "gl_pass_run: enabling blend"); } // Update VBO and VAO pl_buf vert = params->vertex_buf; struct pl_buf_gl *vert_gl = vert ? PL_PRIV(vert) : NULL; glBindBuffer(GL_ARRAY_BUFFER, vert ? vert_gl->buffer : pass_gl->buffer); if (!vert) { // Update the buffer directly. In theory we could also do a memcmp // cache here to avoid unnecessary updates. glBufferData(GL_ARRAY_BUFFER, pl_vertex_buf_size(params), params->vertex_data, GL_STREAM_DRAW); } if (pass_gl->vao) glBindVertexArray(pass_gl->vao); uint64_t vert_id = vert ? vert_gl->id : 0; size_t vert_offset = vert ? params->buf_offset : 0; if (!pass_gl->vao || pass_gl->vao_id != vert_id || pass_gl->vao_offset != vert_offset) { // We need to update the VAO when the buffer ID or offset changes gl_update_va(pass, vert_offset); pass_gl->vao_id = vert_id; pass_gl->vao_offset = vert_offset; } gl_check_err(gpu, "gl_pass_run: update/bind vertex buffer"); static const GLenum map_prim[PL_PRIM_TYPE_COUNT] = { [PL_PRIM_TRIANGLE_LIST] = GL_TRIANGLES, [PL_PRIM_TRIANGLE_STRIP] = GL_TRIANGLE_STRIP, }; GLenum mode = map_prim[pass->params.vertex_type]; gl_timer_begin(params->timer); if (params->index_data) { static const GLenum index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = GL_UNSIGNED_SHORT, [PL_INDEX_UINT32] = GL_UNSIGNED_INT, }; // Upload indices to temporary buffer object if (!pass_gl->index_buffer) glGenBuffers(1, &pass_gl->index_buffer); // lazily allocated glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, pass_gl->index_buffer); glBufferData(GL_ELEMENT_ARRAY_BUFFER, pl_index_buf_size(params), params->index_data, GL_STREAM_DRAW); glDrawElements(mode, params->vertex_count, index_fmts[params->index_fmt], 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } else if (params->index_buf) { // The pointer argument becomes the index buffer offset struct pl_buf_gl *index_gl = PL_PRIV(params->index_buf); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_gl->buffer); glDrawElements(mode, params->vertex_count, GL_UNSIGNED_SHORT, (void *) params->index_offset); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } else { // Note: the VBO offset is handled in the VAO glDrawArrays(mode, 0, params->vertex_count); } gl_timer_end(params->timer); gl_check_err(gpu, "gl_pass_run: drawing"); if (pass_gl->vao) { glBindVertexArray(0); } else { for (int i = 0; i < pass->params.num_vertex_attribs; i++) glDisableVertexAttribArray(i); } glBindBuffer(GL_ARRAY_BUFFER, 0); glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); break; } case PL_PASS_COMPUTE: gl_timer_begin(params->timer); glDispatchCompute(params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); gl_timer_end(params->timer); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } for (int i = 0; i < pass->params.num_descriptors; i++) unbind_desc(pass, i, ¶ms->desc_bindings[i]); glActiveTexture(GL_TEXTURE0); glUseProgram(0); gl_check_err(gpu, "gl_pass_run"); RELEASE_CURRENT(); } libplacebo-v4.192.1/src/opengl/gpu_tex.c000066400000000000000000001060331417677245700200730ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "utils.h" #ifdef PL_HAVE_UNIX #include #include #endif void gl_tex_destroy(pl_gpu gpu, pl_tex tex) { if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing texture, leaking resources!"); return; } struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (tex_gl->fbo && !tex_gl->wrapped_fb) glDeleteFramebuffers(1, &tex_gl->fbo); #ifdef EPOXY_HAS_EGL if (tex_gl->image) { struct pl_gl *p = PL_PRIV(gpu); eglDestroyImageKHR(p->egl_dpy, tex_gl->image); } #endif if (!tex_gl->wrapped_tex) glDeleteTextures(1, &tex_gl->texture); #ifdef PL_HAVE_UNIX if (tex_gl->fd != -1) close(tex_gl->fd); #endif gl_check_err(gpu, "gl_tex_destroy"); RELEASE_CURRENT(); pl_free((void *) tex); } static GLbitfield tex_barrier(pl_tex tex) { GLbitfield barrier = 0; const struct pl_tex_params *params = &tex->params; if (params->sampleable) barrier |= GL_TEXTURE_FETCH_BARRIER_BIT; if (params->renderable || params->blit_src || params->blit_dst) barrier |= GL_FRAMEBUFFER_BARRIER_BIT; if (params->storable) barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; if (params->host_writable || params->host_readable) barrier |= GL_TEXTURE_UPDATE_BARRIER_BIT; return barrier; } #ifdef EPOXY_HAS_EGL #define ADD_ATTRIB(name, value) \ do { \ assert(num_attribs + 3 < PL_ARRAY_SIZE(attribs)); \ attribs[num_attribs++] = (name); \ attribs[num_attribs++] = (value); \ } while (0) #define ADD_DMABUF_PLANE_ATTRIBS(plane, fd, offset, stride) \ do { \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \ fd); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \ offset); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \ stride); \ } while (0) #define ADD_DMABUF_PLANE_MODIFIERS(plane, mod) \ do { \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_LO_EXT, \ (uint32_t) ((mod) & 0xFFFFFFFFlu)); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_HI_EXT, \ (uint32_t) (((mod) >> 32u) & 0xFFFFFFFFlu)); \ } while (0) static bool gl_tex_import(pl_gpu gpu, enum pl_handle_type handle_type, const struct pl_shared_mem *shared_mem, struct pl_tex *tex) { if (!MAKE_CURRENT()) return false; struct pl_gl *p = PL_PRIV(gpu); struct pl_tex_gl *tex_gl = PL_PRIV(tex); const struct pl_tex_params *params = &tex->params; int attribs[20] = {}; int num_attribs = 0; ADD_ATTRIB(EGL_WIDTH, params->w); ADD_ATTRIB(EGL_HEIGHT, params->h); switch (handle_type) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: if (shared_mem->handle.fd == -1) { PL_ERR(gpu, "%s: invalid fd", __func__); goto error; } tex_gl->fd = dup(shared_mem->handle.fd); if (tex_gl->fd == -1) { PL_ERR(gpu, "%s: cannot duplicate fd %d for importing: %s", __func__, shared_mem->handle.fd, strerror(errno)); goto error; } ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, params->format->fourcc); ADD_DMABUF_PLANE_ATTRIBS(0, tex_gl->fd, shared_mem->offset, PL_DEF(shared_mem->stride_w, params->w)); if (p->has_modifiers) ADD_DMABUF_PLANE_MODIFIERS(0, shared_mem->drm_format_mod); attribs[num_attribs] = EGL_NONE; // EGL_LINUX_DMA_BUF_EXT requires EGL_NO_CONTEXT tex_gl->image = eglCreateImageKHR(p->egl_dpy, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, (EGLClientBuffer) NULL, attribs); break; #endif // PL_HAVE_UNIX case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_HOST_PTR: case PL_HANDLE_FD: pl_unreachable(); } if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) goto error; // tex_gl->image should be already bound glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, tex_gl->image); if (!egl_check_err(gpu, "EGLImageTargetTexture2DOES")) goto error; RELEASE_CURRENT(); return true; error: PL_ERR(gpu, "Failed importing GL texture!"); RELEASE_CURRENT(); return false; } static EGLenum egl_from_gl_target(pl_gpu gpu, int target) { switch(target) { case GL_TEXTURE_2D: return EGL_GL_TEXTURE_2D; case GL_TEXTURE_3D: return EGL_GL_TEXTURE_3D; default: PL_ERR(gpu, "%s: unsupported texture target 0x%x", __func__, target); return 0; } } static bool gl_tex_export(pl_gpu gpu, enum pl_handle_type handle_type, bool preserved, struct pl_tex *tex) { struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_gl *p = PL_PRIV(gpu); struct pl_shared_mem *shared_mem = &tex->shared_mem; bool ok; EGLenum egltarget = egl_from_gl_target(gpu, tex_gl->target); if (!egltarget) goto error; int attribs[] = { EGL_IMAGE_PRESERVED, preserved, EGL_NONE, }; // We assume that tex_gl->texture is already bound tex_gl->image = eglCreateImageKHR(p->egl_dpy, p->egl_ctx, egltarget, (EGLClientBuffer) (uintptr_t) tex_gl->texture, attribs); if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) goto error; switch (handle_type) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: { int fourcc = 0; int num_planes = 0; EGLuint64KHR modifier = 0; ok = eglExportDMABUFImageQueryMESA(p->egl_dpy, tex_gl->image, &fourcc, &num_planes, &modifier); if (!egl_check_err(gpu, "eglExportDMABUFImageQueryMESA") || !ok) goto error; if (fourcc != tex->params.format->fourcc) { PL_ERR(gpu, "Exported DRM format %s does not match fourcc of " "specified pl_fmt %s? Please open a bug.", PRINT_FOURCC(fourcc), PRINT_FOURCC(tex->params.format->fourcc)); goto error; } if (num_planes != 1) { PL_ERR(gpu, "Unsupported number of planes: %d", num_planes); goto error; } int offset = 0, stride = 0; ok = eglExportDMABUFImageMESA(p->egl_dpy, tex_gl->image, &tex_gl->fd, &stride, &offset); if (!egl_check_err(gpu, "eglExportDMABUFImageMesa") || !ok) goto error; off_t fdsize = lseek(tex_gl->fd, 0, SEEK_END); off_t err = fdsize > 0 && lseek(tex_gl->fd, 0, SEEK_SET); if (fdsize <= 0 || err < 0) { PL_ERR(gpu, "Failed querying FD size: %s", strerror(errno)); goto error; } *shared_mem = (struct pl_shared_mem) { .handle.fd = tex_gl->fd, .size = fdsize, .offset = offset, .drm_format_mod = modifier, .stride_w = stride, }; break; } #endif // PL_HAVE_UNIX case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_HOST_PTR: case PL_HANDLE_FD: pl_unreachable(); } return true; error: PL_ERR(gpu, "Failed exporting GL texture!"); return false; } #else // !EPOXY_HAS_EGL static bool gl_tex_import(pl_gpu gpu, enum pl_handle_type handle_type, const struct pl_shared_mem *shared_mem, struct pl_tex *tex) { abort(); // no implementations } static bool gl_tex_export(pl_gpu gpu, enum pl_handle_type handle_type, bool preserved, struct pl_tex *tex) { abort(); // no implementations } #endif // EPOXY_HAS_EGL static const char *fb_err_str(GLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(GL_FRAMEBUFFER_COMPLETE); CASE(GL_FRAMEBUFFER_UNDEFINED); CASE(GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT); CASE(GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT); CASE(GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS); CASE(GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER); CASE(GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER); CASE(GL_FRAMEBUFFER_UNSUPPORTED); CASE(GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE); CASE(GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS); #undef CASE default: return "unknown error"; } } pl_tex gl_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_gl); tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_gl *tex_gl = PL_PRIV(tex); const struct gl_format **fmtp = PL_PRIV(params->format); const struct gl_format *fmt = *fmtp; *tex_gl = (struct pl_tex_gl) { .format = fmt->fmt, .iformat = fmt->ifmt, .type = fmt->type, .barrier = tex_barrier(tex), .fd = -1, }; static const GLint targets[] = { [1] = GL_TEXTURE_1D, [2] = GL_TEXTURE_2D, [3] = GL_TEXTURE_3D, }; int dims = pl_tex_params_dimension(*params); pl_assert(dims >= 1 && dims <= 3); tex_gl->target = targets[dims]; glGenTextures(1, &tex_gl->texture); glBindTexture(tex_gl->target, tex_gl->texture); if (params->import_handle) { if (!gl_tex_import(gpu, params->import_handle, ¶ms->shared_mem, tex)) goto error; } else { glPixelStorei(GL_UNPACK_ALIGNMENT, 1); switch (dims) { case 1: glTexImage1D(tex_gl->target, 0, tex_gl->iformat, params->w, 0, tex_gl->format, tex_gl->type, params->initial_data); break; case 2: glTexImage2D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, 0, tex_gl->format, tex_gl->type, params->initial_data); break; case 3: glTexImage3D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, params->d, 0, tex_gl->format, tex_gl->type, params->initial_data); break; } glPixelStorei(GL_UNPACK_ALIGNMENT, 4); } if (params->export_handle) { if (!gl_tex_export(gpu, params->export_handle, params->initial_data, tex)) goto error; } glBindTexture(tex_gl->target, 0); if (!gl_check_err(gpu, "gl_tex_create: texture")) goto error; bool need_fbo = tex->params.renderable; if (tex->params.blit_src || tex->params.blit_dst) { if (dims != 2) { PL_ERR(gpu, "Blittable textures may only be 2D!"); goto error; } need_fbo = true; } bool can_fbo = tex->params.format->caps & PL_FMT_CAP_RENDERABLE && tex->params.d == 0; // Try creating an FBO for host-readable textures, since this allows // reading back with glReadPixels instead of glGetTexImage. (Additionally, // GLES does not support glGetTexImage) if (tex->params.host_readable && (can_fbo || p->gles_ver)) need_fbo = true; if (need_fbo) { if (!can_fbo) { PL_ERR(gpu, "Trying to create a renderable/blittable/readable " "texture with an incompatible (non-renderable) format!"); goto error; } glGenFramebuffers(1, &tex_gl->fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); switch (dims) { case 1: glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_1D, tex_gl->texture, 0); break; case 2: glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex_gl->texture, 0); break; case 3: pl_unreachable(); } GLenum err = glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (err != GL_FRAMEBUFFER_COMPLETE) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Failed creating framebuffer: %s", fb_err_str(err)); goto error; } if (params->host_readable && p->gles_ver) { GLint read_type = 0, read_fmt = 0; glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); if (read_type != tex_gl->type || read_fmt != tex_gl->format) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Trying to create host_readable texture whose " "implementation-defined pixel read format " "(type=0x%X, fmt=0x%X) does not match the texture's " "internal format (type=0x%X, fmt=0x%X)! This is a " "GLES/driver limitation, there's little we can do " "about it.", read_type, read_fmt, tex_gl->type, tex_gl->format); goto error; } } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (!gl_check_err(gpu, "gl_tex_create: fbo")) goto error; } RELEASE_CURRENT(); return tex; error: gl_tex_destroy(gpu, tex); RELEASE_CURRENT(); return NULL; } static bool gl_fb_query(pl_gpu gpu, int fbo, struct pl_fmt *fmt, struct gl_format *glfmt) { struct pl_gl *p = PL_PRIV(gpu); *fmt = (struct pl_fmt) { .name = "fbo", .type = PL_FMT_UNKNOWN, .caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_BLENDABLE, .num_components = 4, .component_depth = {8, 8, 8, 8}, // default to rgba8 .sample_order = {0, 1, 2, 3}, }; *glfmt = (struct gl_format) { .fmt = GL_RGBA, }; bool can_query = gl_test_ext(gpu, "GL_ARB_framebuffer_object", 30, 20); if (!fbo && p->gles_ver && p->gles_ver < 30) can_query = false; // can't query default framebuffer on GLES 2.0 if (can_query) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); GLenum obj = p->gles_ver ? GL_BACK : GL_BACK_LEFT; if (fbo != 0) obj = GL_COLOR_ATTACHMENT0; GLint type = 0; glGetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE, &type); switch (type) { case GL_FLOAT: fmt->type = PL_FMT_FLOAT; break; case GL_INT: fmt->type = PL_FMT_SINT; break; case GL_UNSIGNED_INT: fmt->type = PL_FMT_UINT; break; case GL_SIGNED_NORMALIZED: fmt->type = PL_FMT_SNORM; break; case GL_UNSIGNED_NORMALIZED: fmt->type = PL_FMT_UNORM; break; default: fmt->type = PL_FMT_UNKNOWN; break; } glGetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE, &fmt->component_depth[0]); glGetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &fmt->component_depth[1]); glGetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE, &fmt->component_depth[2]); glGetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE, &fmt->component_depth[3]); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_fb_query"); // Strip missing components from component map while (!fmt->component_depth[fmt->num_components - 1]) fmt->num_components--; } int gpu_bits = 0; for (int i = 0; i < 4; i++) gpu_bits += fmt->component_depth[i]; fmt->internal_size = (gpu_bits + 7) / 8; size_t host_size = 0; switch (fmt->type) { case PL_FMT_UNKNOWN: fmt->opaque = true; return true; case PL_FMT_FLOAT: glfmt->type = GL_FLOAT; host_size = sizeof(float); break; case PL_FMT_UNORM: case PL_FMT_UINT: if (gpu_bits > 32) { glfmt->type = GL_UNSIGNED_SHORT; host_size = sizeof(uint16_t); } else { glfmt->type = GL_UNSIGNED_BYTE; host_size = sizeof(uint8_t); } break; case PL_FMT_SNORM: case PL_FMT_SINT: if (gpu_bits > 32) { glfmt->type = GL_SHORT; host_size = sizeof(int16_t); } else { glfmt->type = GL_BYTE; host_size = sizeof(int8_t); } break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } fmt->texel_size = fmt->num_components * host_size; for (int i = 0; i < fmt->num_components; i++) fmt->host_bits[i] = 8 * host_size; fmt->caps |= PL_FMT_CAP_HOST_READABLE; return true; } pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params) { if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_tex *tex = pl_alloc_obj(NULL, tex, struct pl_tex_gl); struct pl_tex_gl *tex_gl = PL_PRIV(tex); *tex = (struct pl_tex) { .params = { .w = params->width, .h = params->height, .d = params->depth, }, }; pl_fmt fmt = NULL; const struct gl_format *glfmt = NULL; if (params->texture) { // Wrapping texture: Require matching iformat pl_assert(params->iformat); for (int i = 0; i < gpu->num_formats; i++) { const struct gl_format **glfmtp = PL_PRIV(gpu->formats[i]); if ((*glfmtp)->ifmt == params->iformat) { fmt = gpu->formats[i]; glfmt = *glfmtp; break; } } if (!fmt) { PL_ERR(gpu, "Failed mapping iformat %d to any equivalent `pl_fmt`", params->iformat); goto error; } } else { // Wrapping framebuffer: Allocate/infer generic FBO format fmt = pl_alloc_obj((void *) gpu, fmt, const struct gl_format *); glfmt = pl_alloc_ptr((void *) fmt, glfmt); const struct gl_format **glfmtp = PL_PRIV(fmt); *glfmtp = glfmt; if (!gl_fb_query(gpu, params->framebuffer, (struct pl_fmt *) fmt, (struct gl_format *) glfmt)) { PL_ERR(gpu, "Failed querying framebuffer specifics!"); pl_free((void *) fmt); goto error; } } *tex_gl = (struct pl_tex_gl) { .target = params->target, .texture = params->texture, .fbo = params->framebuffer, .wrapped_tex = !!params->texture, .wrapped_fb = params->framebuffer || !params->texture, .iformat = glfmt->ifmt, .format = glfmt->fmt, .type = glfmt->type, .fd = -1, }; int dims = pl_tex_params_dimension(tex->params); if (!tex_gl->target) { switch (dims) { case 1: tex_gl->target = GL_TEXTURE_1D; break; case 2: tex_gl->target = GL_TEXTURE_2D; break; case 3: tex_gl->target = GL_TEXTURE_3D; break; } } // Map texture-specific sampling metadata if (params->texture) { switch (params->target) { case GL_TEXTURE_1D: if (params->width || params->depth) { PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_1D"); goto error; } // fall through case GL_TEXTURE_2D: if (params->depth) { PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_2D"); goto error; } // fall through case 0: case GL_TEXTURE_3D: tex->sampler_type = PL_SAMPLER_NORMAL; break; case GL_TEXTURE_RECTANGLE: tex->sampler_type = PL_SAMPLER_RECT; break; case GL_TEXTURE_EXTERNAL_OES: tex->sampler_type = PL_SAMPLER_EXTERNAL; break; default: PL_ERR(gpu, "Failed mapping texture target %u to any equivalent " "`pl_sampler_type`", params->target); goto error; } } // Create optional extra fbo if needed/possible bool can_fbo = tex_gl->texture && (fmt->caps & PL_FMT_CAP_RENDERABLE) && tex->sampler_type != PL_SAMPLER_EXTERNAL && dims < 3; if (can_fbo && !tex_gl->fbo) { glGenFramebuffers(1, &tex_gl->fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); switch (dims) { case 1: glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, tex_gl->target, tex_gl->texture, 0); break; case 2: glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, tex_gl->target, tex_gl->texture, 0); break; } GLenum err = glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (err != GL_FRAMEBUFFER_COMPLETE) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Failed creating framebuffer: error code %d", err); goto error; } if (p->gles_ver) { GLint read_type = 0, read_fmt = 0; glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); tex->params.host_readable = read_type == tex_gl->type && read_fmt == tex_gl->format; } else { tex->params.host_readable = true; } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (!gl_check_err(gpu, "pl_opengl_wrap: fbo")) goto error; } // Complete the process of inferring the texture capabilities tex->params.format = fmt; if (tex_gl->texture) { tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE; tex->params.storable = fmt->caps & PL_FMT_CAP_STORABLE; tex->params.host_writable = !fmt->opaque; tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; } if (tex_gl->fbo || tex_gl->wrapped_fb) { tex->params.renderable = fmt->caps & PL_FMT_CAP_RENDERABLE; tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; if (dims == 2 && (fmt->caps & PL_FMT_CAP_BLITTABLE)) { tex->params.blit_src = true; tex->params.blit_dst = true; } } tex_gl->barrier = tex_barrier(tex); RELEASE_CURRENT(); return tex; error: gl_tex_destroy(gpu, tex); RELEASE_CURRENT(); return NULL; } unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, int *out_iformat, unsigned int *out_fbo) { struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (!tex_gl->texture) { PL_ERR(gpu, "Trying to call `pl_opengl_unwrap` on a pseudo-texture " "(perhaps obtained by `pl_swapchain_start_frame`?)"); return 0; } if (out_target) *out_target = tex_gl->target; if (out_iformat) *out_iformat = tex_gl->iformat; if (out_fbo) *out_fbo = tex_gl->fbo; return tex_gl->texture; } void gl_tex_invalidate(pl_gpu gpu, pl_tex tex) { struct pl_gl *p = PL_PRIV(gpu); struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (!MAKE_CURRENT()) return; if (tex_gl->texture && p->has_invalidate_tex) glInvalidateTexImage(tex_gl->texture, 0); if ((tex_gl->wrapped_fb || tex_gl->fbo) && p->has_invalidate_fb) { GLenum attachment = tex_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); } gl_check_err(gpu, "gl_tex_invalidate"); RELEASE_CURRENT(); } void gl_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { if (!MAKE_CURRENT()) return; struct pl_tex_gl *tex_gl = PL_PRIV(tex); pl_assert(tex_gl->fbo || tex_gl->wrapped_fb); switch (tex->params.format->type) { case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: glClearColor(color.f[0], color.f[1], color.f[2], color.f[3]); break; case PL_FMT_UINT: glClearColorIuiEXT(color.u[0], color.u[1], color.u[2], color.u[3]); break; case PL_FMT_SINT: glClearColorIiEXT(color.i[0], color.i[1], color.i[2], color.i[3]); break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); glClear(GL_COLOR_BUFFER_BIT); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_tex_clear"); RELEASE_CURRENT(); } void gl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { if (!MAKE_CURRENT()) return; struct pl_tex_gl *src_gl = PL_PRIV(params->src); struct pl_tex_gl *dst_gl = PL_PRIV(params->dst); pl_assert(src_gl->fbo || src_gl->wrapped_fb); pl_assert(dst_gl->fbo || dst_gl->wrapped_fb); glBindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo); static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, }; struct pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; glBlitFramebuffer(src_rc.x0, src_rc.y0, src_rc.x1, src_rc.y1, dst_rc.x0, dst_rc.y0, dst_rc.x1, dst_rc.y1, GL_COLOR_BUFFER_BIT, filters[params->sample_mode]); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_tex_blit"); RELEASE_CURRENT(); } static int get_alignment(size_t pitch) { if (pitch % 8 == 0) return 8; if (pitch % 4 == 0) return 4; if (pitch % 2 == 0) return 2; return 1; } bool gl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gl *p = PL_PRIV(gpu); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_buf buf = params->buf; struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; // If the user requests asynchronous uploads, it's more efficient to do // them via a PBO - this allows us to skip blocking the caller, especially // when the host pointer can be imported directly. if (params->callback && !buf) { size_t buf_size = pl_tex_transfer_size(params); const size_t min_size = 32*1024; // 32 KiB if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) return pl_tex_upload_pbo(gpu, params); } if (!MAKE_CURRENT()) return false; uintptr_t src = (uintptr_t) params->ptr; if (buf) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); src = buf_gl->offset + params->buf_offset; } bool misaligned = params->row_pitch % fmt->texel_size; int stride_w = params->row_pitch / fmt->texel_size; int stride_h = params->depth_pitch / params->row_pitch; int dims = pl_tex_params_dimension(tex->params); if (dims > 1) glPixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(params->row_pitch)); int rows = pl_rect_h(params->rc); if (stride_w != pl_rect_w(params->rc) || misaligned) { if (p->has_stride && !misaligned) { glPixelStorei(GL_UNPACK_ROW_LENGTH, stride_w); } else { rows = 1; } } int imgs = pl_rect_d(params->rc); if (stride_h != pl_rect_h(params->rc) || rows < stride_h) { if (p->has_unpack_image_height) { glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, stride_h); } else { imgs = 1; } } glBindTexture(tex_gl->target, tex_gl->texture); gl_timer_begin(params->timer); switch (dims) { case 1: glTexSubImage1D(tex_gl->target, 0, params->rc.x0, pl_rect_w(params->rc), tex_gl->format, tex_gl->type, (void *) src); break; case 2: for (int y = params->rc.y0; y < params->rc.y1; y += rows) { glTexSubImage2D(tex_gl->target, 0, params->rc.x0, y, pl_rect_w(params->rc), rows, tex_gl->format, tex_gl->type, (void *) src); src += params->row_pitch * rows; } break; case 3: for (int z = params->rc.z0; z < params->rc.z1; z += imgs) { uintptr_t row_src = src; for (int y = params->rc.y0; y < params->rc.y1; y += rows) { glTexSubImage3D(tex_gl->target, 0, params->rc.x0, y, z, pl_rect_w(params->rc), rows, imgs, tex_gl->format, tex_gl->type, (void *) row_src); row_src = (uintptr_t) row_src + params->row_pitch * rows; } src += params->depth_pitch * imgs; } break; } gl_timer_end(params->timer); glBindTexture(tex_gl->target, 0); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); if (p->has_stride) glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); if (p->has_unpack_image_height) glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); if (buf) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); if (buf->params.host_mapped) { // Make sure the PBO is not reused until GL is done with it. If a // previous operation is pending, "update" it by creating a new // fence that will cover the previous operation as well. glDeleteSync(buf_gl->fence); buf_gl->fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } } if (params->callback) { PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { .sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), .callback = params->callback, .priv = params->priv, }); } bool ok = gl_check_err(gpu, "gl_tex_upload"); RELEASE_CURRENT(); return ok; } bool gl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gl *p = PL_PRIV(gpu); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_buf buf = params->buf; struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; bool ok = true; if (params->callback && !buf) { size_t buf_size = pl_tex_transfer_size(params); const size_t min_size = 32*1024; // 32 KiB if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) return pl_tex_download_pbo(gpu, params); } if (!MAKE_CURRENT()) return false; uintptr_t dst = (uintptr_t) params->ptr; if (buf) { glBindBuffer(GL_PIXEL_PACK_BUFFER, buf_gl->buffer); dst = buf_gl->offset + params->buf_offset; } struct pl_rect3d full = { 0, 0, 0, tex->params.w, PL_DEF(tex->params.h, 1), PL_DEF(tex->params.d, 1), }; bool misaligned = params->row_pitch % fmt->texel_size; int stride_w = params->row_pitch / fmt->texel_size; int stride_h = params->depth_pitch / params->row_pitch; int dims = pl_tex_params_dimension(tex->params); bool is_copy = pl_rect3d_eq(params->rc, full) && stride_w == tex->params.w && stride_h == PL_DEF(tex->params.h, 1) && !misaligned; gl_timer_begin(params->timer); if (tex_gl->fbo || tex_gl->wrapped_fb) { // We can use a more efficient path when we have an FBO available if (dims > 1) glPixelStorei(GL_PACK_ALIGNMENT, get_alignment(params->row_pitch)); int rows = pl_rect_h(params->rc); if (stride_w != tex->params.w || misaligned) { if (p->has_stride && !misaligned) { glPixelStorei(GL_PACK_ROW_LENGTH, stride_w); } else { rows = 1; } } // No 3D framebuffers pl_assert(pl_rect_d(params->rc) == 1); glBindFramebuffer(GL_READ_FRAMEBUFFER, tex_gl->fbo); for (int y = params->rc.y0; y < params->rc.y1; y += rows) { glReadPixels(params->rc.x0, y, pl_rect_w(params->rc), rows, tex_gl->format, tex_gl->type, (void *) dst); dst += params->row_pitch * rows; } glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glPixelStorei(GL_PACK_ALIGNMENT, 4); if (p->has_stride) glPixelStorei(GL_PACK_ROW_LENGTH, 0); } else if (is_copy) { // We're downloading the entire texture glBindTexture(tex_gl->target, tex_gl->texture); glGetTexImage(tex_gl->target, 0, tex_gl->format, tex_gl->type, (void *) dst); glBindTexture(tex_gl->target, 0); } else { PL_ERR(gpu, "Partial downloads of 3D textures not implemented!"); ok = false; } gl_timer_end(params->timer); if (buf) { glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); if (ok && buf->params.host_mapped) { glDeleteSync(buf_gl->fence); buf_gl->fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } } if (params->callback) { PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { .sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), .callback = params->callback, .priv = params->priv, }); } ok &= gl_check_err(gpu, "gl_tex_download"); RELEASE_CURRENT(); return ok; } libplacebo-v4.192.1/src/opengl/swapchain.c000066400000000000000000000170711417677245700204000ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "formats.h" #include "gpu.h" #include "swapchain.h" #include "utils.h" #include "pl_thread.h" struct priv { struct pl_opengl_swapchain_params params; pl_opengl gl; pl_mutex lock; bool has_sync; // current parameters pl_tex fb; bool frame_started; // vsync fences int swapchain_depth; PL_ARRAY(GLsync) vsync_fences; }; static struct pl_sw_fns opengl_swapchain; pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, const struct pl_opengl_swapchain_params *params) { pl_gpu gpu = gl->gpu; if (params->max_swapchain_depth < 0) { PL_ERR(gpu, "Tried specifying negative swapchain depth?"); return NULL; } if (!gl_make_current(gl)) return NULL; struct pl_swapchain *sw = pl_zalloc_obj(NULL, sw, struct priv); sw->impl = &opengl_swapchain; sw->log = gpu->log; sw->ctx = sw->log; sw->gpu = gpu; struct priv *p = PL_PRIV(sw); pl_mutex_init(&p->lock); p->params = *params; p->has_sync = epoxy_has_gl_extension("GL_ARB_sync"); p->gl = gl; gl_release_current(gl); return sw; } static void gl_sw_destroy(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); pl_gpu_flush(gpu); pl_tex_destroy(gpu, &p->fb); pl_mutex_destroy(&p->lock); pl_free((void *) sw); } static int gl_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->params.max_swapchain_depth; } static bool gl_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); const int w = *width, h = *height; pl_mutex_lock(&p->lock); if (p->fb && w == p->fb->params.w && h == p->fb->params.h) { pl_mutex_unlock(&p->lock); return true; } if (p->frame_started && (w || h)) { PL_ERR(sw, "Tried resizing the swapchain while a frame was in progress! " "Please submit the current frame first."); pl_mutex_unlock(&p->lock); return false; } if (w && h) { pl_tex_destroy(sw->gpu, &p->fb); p->fb = pl_opengl_wrap(sw->gpu, pl_opengl_wrap_params( .framebuffer = p->params.framebuffer.id, .width = w, .height = h, )); if (!p->fb) { PL_ERR(sw, "Failed wrapping OpenGL framebuffer!"); pl_mutex_unlock(&p->lock); return false; } } if (!p->fb) { PL_ERR(sw, "Tried calling `pl_swapchain_resize` with unknown size! " "This is forbidden for OpenGL. The first call to " "`pl_swapchain_resize` must include the width and height of the " "swapchain, because there's no way to figure this out from " "within the API."); pl_mutex_unlock(&p->lock); return false; } *width = p->fb->params.w; *height = p->fb->params.h; pl_mutex_unlock(&p->lock); return true; } void pl_opengl_swapchain_update_fb(pl_swapchain sw, const struct pl_opengl_framebuffer *fb) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); if (p->frame_started) { PL_ERR(sw,"Tried calling `pl_opengl_swapchain_update_fb` while a frame " "was in progress! Please submit the current frame first."); pl_mutex_unlock(&p->lock); return; } if (p->params.framebuffer.id != fb->id) pl_tex_destroy(sw->gpu, &p->fb); p->params.framebuffer = *fb; pl_mutex_unlock(&p->lock); } static bool gl_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); bool ok = false; if (!p->fb) { PL_ERR(sw, "Unknown framebuffer size. Please call `pl_swapchain_resize` " "before `pl_swapchain_start_frame` for OpenGL swapchains!"); goto error; } if (p->frame_started) { PL_ERR(sw, "Attempted calling `pl_swapchain_start` while a frame was " "already in progress! Call `pl_swapchain_submit_frame` first."); goto error; } if (!gl_make_current(p->gl)) goto error; *out_frame = (struct pl_swapchain_frame) { .fbo = p->fb, .flipped = !p->params.framebuffer.flipped, .color_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = p->fb->params.format->num_components == 4 ? PL_ALPHA_PREMULTIPLIED : PL_ALPHA_UNKNOWN, .bits = { // Just use the red channel in the absence of anything more // sane to do, because the red channel is both guaranteed to // exist and also typically has the minimum number of bits // (which is arguably what matters for dithering) .sample_depth = p->fb->params.format->component_depth[0], .color_depth = p->fb->params.format->component_depth[0], }, }, .color_space = pl_color_space_monitor, }; p->frame_started = gl_check_err(sw->gpu, "gl_sw_start_frame"); if (!p->frame_started) goto error; // keep lock held return true; error: gl_release_current(p->gl); pl_mutex_unlock(&p->lock); return ok; } static bool gl_sw_submit_frame(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); pl_assert(p->frame_started); if (p->has_sync && p->params.max_swapchain_depth) { GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); if (fence) PL_ARRAY_APPEND(sw, p->vsync_fences, fence); } p->frame_started = false; bool ok = gl_check_err(sw->gpu, "gl_sw_submit_frame"); gl_release_current(p->gl); pl_mutex_unlock(&p->lock); return ok; } static void gl_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); if (!p->params.swap_buffers) { PL_ERR(sw, "`pl_swapchain_swap_buffers` called but no " "`params.swap_buffers` callback set!"); return; } pl_mutex_lock(&p->lock); if (!gl_make_current(p->gl)) { pl_mutex_unlock(&p->lock); return; } p->params.swap_buffers(p->params.priv); const int max_depth = p->params.max_swapchain_depth; while (max_depth && p->vsync_fences.num >= max_depth) { glClientWaitSync(p->vsync_fences.elem[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); glDeleteSync(p->vsync_fences.elem[0]); PL_ARRAY_REMOVE_AT(p->vsync_fences, 0); } gl_check_err(sw->gpu, "gl_sw_swap_buffers"); gl_release_current(p->gl); pl_mutex_unlock(&p->lock); } static struct pl_sw_fns opengl_swapchain = { .destroy = gl_sw_destroy, .latency = gl_sw_latency, .resize = gl_sw_resize, .start_frame = gl_sw_start_frame, .submit_frame = gl_sw_submit_frame, .swap_buffers = gl_sw_swap_buffers, }; libplacebo-v4.192.1/src/opengl/utils.c000066400000000000000000000077441417677245700175710ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "gpu.h" #include "utils.h" const char *gl_err_str(GLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(GL_NO_ERROR); CASE(GL_INVALID_ENUM); CASE(GL_INVALID_VALUE); CASE(GL_INVALID_OPERATION); CASE(GL_INVALID_FRAMEBUFFER_OPERATION); CASE(GL_OUT_OF_MEMORY); CASE(GL_STACK_UNDERFLOW); CASE(GL_STACK_OVERFLOW); #undef CASE default: return "unknown error"; } } void gl_poll_callbacks(pl_gpu gpu) { struct pl_gl *gl = PL_PRIV(gpu); while (gl->callbacks.num) { struct gl_cb cb = gl->callbacks.elem[0]; GLenum res = glClientWaitSync(cb.sync, 0, 0); switch (res) { case GL_ALREADY_SIGNALED: case GL_CONDITION_SATISFIED: PL_ARRAY_REMOVE_AT(gl->callbacks, 0); cb.callback(cb.priv); continue; case GL_WAIT_FAILED: PL_ARRAY_REMOVE_AT(gl->callbacks, 0); glDeleteSync(cb.sync); gl->failed = true; gl_check_err(gpu, "gl_poll_callbacks"); // NOTE: will recurse! return; case GL_TIMEOUT_EXPIRED: return; default: pl_unreachable(); } } } bool gl_check_err(pl_gpu gpu, const char *fun) { struct pl_gl *gl = PL_PRIV(gpu); bool ret = true; while (true) { GLenum error = glGetError(); if (error == GL_NO_ERROR) break; PL_ERR(gpu, "%s: OpenGL error: %s", fun, gl_err_str(error)); ret = false; gl->failed = true; } gl_poll_callbacks(gpu); return ret; } bool gl_is_software(void) { const char *renderer = (char *) glGetString(GL_RENDERER); const char *vendor = (char *) glGetString(GL_VENDOR); return !(renderer && vendor) || strcmp(renderer, "Software Rasterizer") == 0 || strstr(renderer, "llvmpipe") || strstr(renderer, "softpipe") || strcmp(vendor, "Microsoft Corporation") == 0 || strcmp(renderer, "Mesa X11") == 0 || strcmp(renderer, "Apple Software Renderer") == 0; } bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver) { struct pl_gl *p = PL_PRIV(gpu); if (gl_ver && p->gl_ver >= gl_ver) return true; if (gles_ver && p->gles_ver >= gles_ver) return true; return ext ? epoxy_has_gl_extension(ext) : false; } #ifdef EPOXY_HAS_EGL const char *egl_err_str(EGLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(EGL_SUCCESS); CASE(EGL_NOT_INITIALIZED); CASE(EGL_BAD_ACCESS); CASE(EGL_BAD_ALLOC); CASE(EGL_BAD_ATTRIBUTE); CASE(EGL_BAD_CONFIG); CASE(EGL_BAD_CONTEXT); CASE(EGL_BAD_CURRENT_SURFACE); CASE(EGL_BAD_DISPLAY); CASE(EGL_BAD_MATCH); CASE(EGL_BAD_NATIVE_PIXMAP); CASE(EGL_BAD_NATIVE_WINDOW); CASE(EGL_BAD_PARAMETER); CASE(EGL_BAD_SURFACE); #undef CASE default: return "unknown error"; } } bool egl_check_err(pl_gpu gpu, const char *fun) { struct pl_gl *gl = PL_PRIV(gpu); bool ret = true; while (true) { GLenum error = eglGetError(); if (error == EGL_SUCCESS) return ret; PL_ERR(gpu, "%s: EGL error: %s", fun, egl_err_str(error)); ret = false; gl->failed = true; } } #endif // EPOXY_HAS_EGL libplacebo-v4.192.1/src/opengl/utils.h000066400000000000000000000031421417677245700175620ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Iterate through callbacks attached to the `pl_gl` and execute all of the // ones that have completed. // // Thread-safety: Unsafe void gl_poll_callbacks(pl_gpu gpu); // Return a human-readable name for various OpenGL errors // // Thread-safety: Safe const char *gl_err_str(GLenum err); // Check for errors and log them + return false if detected // // Thread-safety: Unsafe bool gl_check_err(pl_gpu gpu, const char *fun); // Returns true if the context is a suspected software rasterizer // // Thread-safety: Unsafe bool gl_is_software(void); // Check for presence of an extension, alternatively a minimum GL version // // Thread-safety: Unsafe bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver); #ifdef EPOXY_HAS_EGL // Thread-safety: Safe const char *egl_err_str(EGLenum err); // Thread-safety: Unsafe bool egl_check_err(pl_gpu gpu, const char *fun); #endif libplacebo-v4.192.1/src/pl_alloc.c000066400000000000000000000165741417677245700167330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" struct header { #ifndef NDEBUG #define MAGIC 0x20210119LU uint32_t magic; #endif size_t size; struct header *parent; struct ext *ext; // Pointer to actual data, for alignment purposes intmax_t data[1]; }; // Lazily allocated, to save space for leaf allocations and allocations which // don't need fancy requirements struct ext { size_t num_children; size_t children_size; // total allocated size of `children` struct header *children[]; }; #define PTR_OFFSET offsetof(struct header, data) #define MAX_ALLOC (SIZE_MAX - PTR_OFFSET) #define MINIMUM_CHILDREN 4 static inline struct header *get_header(void *ptr) { if (!ptr) return NULL; struct header *hdr = (struct header *) ((uintptr_t) ptr - PTR_OFFSET); #ifndef NDEBUG assert(hdr->magic == MAGIC); #endif return hdr; } static inline void *oom(void) { fprintf(stderr, "out of memory\n"); abort(); } static inline struct ext *alloc_ext(struct header *h) { if (!h) return NULL; if (!h->ext) { h->ext = malloc(sizeof(struct ext) + MINIMUM_CHILDREN * sizeof(void *)); if (!h->ext) oom(); h->ext->num_children = 0; h->ext->children_size = MINIMUM_CHILDREN; } return h->ext; } static inline void attach_child(struct header *parent, struct header *child) { child->parent = parent; if (!parent) return; struct ext *ext = alloc_ext(parent); if (ext->num_children == ext->children_size) { size_t new_size = ext->children_size * 2; ext = realloc(ext, sizeof(struct ext) + new_size * sizeof(void *)); if (!ext) oom(); ext->children_size = new_size; parent->ext = ext; } ext->children[ext->num_children++] = child; } static inline void unlink_child(struct header *parent, struct header *child) { child->parent = NULL; if (!parent) return; struct ext *ext = parent->ext; for (size_t i = 0; i < ext->num_children; i++) { if (ext->children[i] == child) { memmove(&ext->children[i], &ext->children[i + 1], (--ext->num_children - i) * sizeof(ext->children[0])); return; } } assert(!"unlinking orphaned child?"); } void *pl_alloc(void *parent, size_t size) { if (size >= MAX_ALLOC) return oom(); struct header *h = malloc(PTR_OFFSET + size); if (!h) return oom(); #ifndef NDEBUG h->magic = MAGIC; #endif h->size = size; h->ext = NULL; attach_child(get_header(parent), h); return h->data; } void *pl_zalloc(void *parent, size_t size) { if (size >= MAX_ALLOC) return oom(); struct header *h = calloc(1, PTR_OFFSET + size); if (!h) return oom(); #ifndef NDEBUG h->magic = MAGIC; #endif h->size = size; attach_child(get_header(parent), h); return h->data; } void *pl_realloc(void *parent, void *ptr, size_t size) { if (size >= MAX_ALLOC) return oom(); if (!ptr) return pl_alloc(parent, size); struct header *h = get_header(ptr); assert(get_header(parent) == h->parent); if (h->size == size) return ptr; struct header *old_h = h; h = realloc(h, PTR_OFFSET + size); if (!h) return oom(); h->size = size; if (h != old_h) { if (h->parent) { struct ext *ext = h->parent->ext; for (size_t i = 0; i < ext->num_children; i++) { if (ext->children[i] == old_h) { ext->children[i] = h; goto done_reparenting; } } assert(!"reallocating orphaned child?"); } done_reparenting: if (h->ext) { for (size_t i = 0; i < h->ext->num_children; i++) h->ext->children[i]->parent = h; } } return h->data; } void pl_free(void *ptr) { struct header *h = get_header(ptr); if (!h) return; pl_free_children(ptr); unlink_child(h->parent, h); free(h->ext); free(h); } void pl_free_children(void *ptr) { struct header *h = get_header(ptr); if (!h || !h->ext) return; #ifndef NDEBUG // this detects recursive hierarchies h->magic = 0; #endif for (size_t i = 0; i < h->ext->num_children; i++) { h->ext->children[i]->parent = NULL; // prevent recursive access pl_free(h->ext->children[i]->data); } #ifndef NDEBUG h->magic = MAGIC; #endif } size_t pl_get_size(void *ptr) { struct header *h = get_header(ptr); return h ? h->size : 0; } void *pl_steal(void *parent, void *ptr) { struct header *h = get_header(ptr); if (!h) return NULL; struct header *new_par = get_header(parent); if (new_par != h->parent) { unlink_child(h->parent, h); attach_child(new_par, h); } return h->data; } void *pl_memdup(void *parent, const void *ptr, size_t size) { if (!size) return NULL; void *new = pl_alloc(parent, size); if (!new) return oom(); assert(ptr); memcpy(new, ptr, size); return new; } char *pl_str0dup0(void *parent, const char *str) { if (!str) return NULL; return pl_memdup(parent, str, strlen(str) + 1); } char *pl_strndup0(void *parent, const char *str, size_t size) { if (!str) return NULL; size_t str_size = strnlen(str, size); char *new = pl_alloc(parent, str_size + 1); if (!new) return oom(); memcpy(new, str, str_size); new[str_size] = '\0'; return new; } struct pl_ref { pl_rc_t rc; }; struct pl_ref *pl_ref_new(void *parent) { struct pl_ref *ref = pl_zalloc_ptr(parent, ref); if (!ref) return oom(); pl_rc_init(&ref->rc); return ref; } struct pl_ref *pl_ref_dup(struct pl_ref *ref) { if (!ref) return NULL; pl_rc_ref(&ref->rc); return ref; } void pl_ref_deref(struct pl_ref **refp) { struct pl_ref *ref = *refp; if (!ref) return; if (pl_rc_deref(&ref->rc)) { pl_free(ref); *refp = NULL; } } char *pl_asprintf(void *parent, const char *fmt, ...) { char *str; va_list ap; va_start(ap, fmt); str = pl_vasprintf(parent, fmt, ap); va_end(ap); return str; } char *pl_vasprintf(void *parent, const char *fmt, va_list ap) { // First, we need to determine the size that will be required for // printing the entire string. Do this by making a copy of the va_list // and printing it to a null buffer. va_list copy; va_copy(copy, ap); int size = vsnprintf(NULL, 0, fmt, copy); va_end(copy); if (size < 0) return NULL; char *str = pl_alloc(parent, size + 1); vsnprintf(str, size + 1, fmt, ap); return str; } libplacebo-v4.192.1/src/pl_alloc.h000066400000000000000000000213331417677245700167250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include // Unlike standard malloc, `size` may be 0, in which case this returns an empty // allocation which can still be used as a parent for other allocations. void *pl_alloc(void *parent, size_t size); void *pl_zalloc(void *parent, size_t size); void *pl_realloc(void *parent, void *ptr, size_t size); static inline void *pl_calloc(void *parent, size_t count, size_t size) { return pl_zalloc(parent, count * size); } #define pl_tmp(parent) pl_alloc(parent, 0) // Variants of the above which resolve to sizeof(*ptr) #define pl_alloc_ptr(parent, ptr) \ (__typeof__(ptr)) pl_alloc(parent, sizeof(*(ptr))) #define pl_zalloc_ptr(parent, ptr) \ (__typeof__(ptr)) pl_zalloc(parent, sizeof(*(ptr))) #define pl_calloc_ptr(parent, num, ptr) \ (__typeof__(ptr)) pl_calloc(parent, num, sizeof(*(ptr))) // Helper function to allocate a struct and immediately assign it #define pl_alloc_struct(parent, type, ...) \ (type *) pl_memdup(parent, &(type) __VA_ARGS__, sizeof(type)) // Free an allocation and its children (recursively) void pl_free(void *ptr); void pl_free_children(void *ptr); #define pl_free_ptr(ptr) \ do { \ pl_free(*(ptr)); \ *(ptr) = NULL; \ } while (0) // Get the current size of an allocation. size_t pl_get_size(void *ptr); #define pl_grow(parent, ptr, size) \ do { \ size_t _size = (size); \ if (_size > pl_get_size(*(ptr))) \ *(ptr) = pl_realloc(parent, *(ptr), _size); \ } while (0) // Reparent an allocation onto a new parent void *pl_steal(void *parent, void *ptr); // Wrapper functions around common string utilities void *pl_memdup(void *parent, const void *ptr, size_t size); char *pl_str0dup0(void *parent, const char *str); char *pl_strndup0(void *parent, const char *str, size_t size); #define pl_memdup_ptr(parent, ptr) \ (__typeof__(ptr)) pl_memdup(parent, ptr, sizeof(*(ptr))) // Helper functions for allocating public/private pairs, done by allocating // `priv` at the address of `pub` + sizeof(pub), rounded up to the maximum // alignment requirements. #define PL_ALIGN_MEM(size) \ (((size) + alignof(max_align_t) - 1) & ~(alignof(max_align_t) - 1)) #define PL_PRIV(pub) \ (void *) ((uintptr_t) (pub) + PL_ALIGN_MEM(sizeof(*(pub)))) #define pl_alloc_obj(parent, ptr, priv) \ (__typeof__(ptr)) pl_alloc(parent, PL_ALIGN_MEM(sizeof(*(ptr))) + sizeof(priv)) #define pl_zalloc_obj(parent, ptr, priv) \ (__typeof__(ptr)) pl_zalloc(parent, PL_ALIGN_MEM(sizeof(*(ptr))) + sizeof(priv)) // Refcounting helper struct pl_ref; // pl_ref_deref will free the ref and all of its children as soon as the // internal refcount reaches 0 struct pl_ref *pl_ref_new(void *parent); struct pl_ref *pl_ref_dup(struct pl_ref *ref); void pl_ref_deref(struct pl_ref **ref); // Helper functions for dealing with arrays #define PL_ARRAY(type) struct { type *elem; int num; } #define PL_ARRAY_RESIZE(parent, arr, len) \ do { \ size_t _new_size = (len) * sizeof((arr).elem[0]); \ (arr).elem = pl_realloc((void *) parent, (arr).elem, _new_size); \ } while (0) #define PL_ARRAY_MEMDUP(parent, arr, ptr, len) \ do { \ size_t _len = (len); \ PL_ARRAY_RESIZE(parent, arr, _len); \ memcpy((arr).elem, ptr, _len * sizeof((arr).elem[0])); \ (arr).num = _len; \ } while (0) #define PL_ARRAY_GROW(parent, arr) \ do { \ size_t _avail = pl_get_size((arr).elem) / sizeof((arr).elem[0]); \ if (_avail < 10) { \ PL_ARRAY_RESIZE(parent, arr, 10); \ } else if ((arr).num == _avail) { \ PL_ARRAY_RESIZE(parent, arr, (arr).num * 1.5); \ } else { \ assert((arr).elem); \ } \ } while (0) #define PL_ARRAY_APPEND(parent, arr, ...) \ do { \ PL_ARRAY_GROW(parent, arr); \ (arr).elem[(arr).num++] = __VA_ARGS__; \ } while (0) #define PL_ARRAY_CONCAT(parent, to, from) \ do { \ if ((from).num) { \ PL_ARRAY_RESIZE(parent, to, (to).num + (from).num); \ memmove(&(to).elem[(to).num], (from).elem, \ (from).num * sizeof((from).elem[0])); \ (to).num += (from).num; \ } \ } while (0) #define PL_ARRAY_REMOVE_RANGE(arr, idx, count) \ do { \ size_t _idx = (idx); \ size_t _count = (count); \ assert(_idx + _count <= (arr).num); \ memmove(&(arr).elem[_idx], &(arr).elem[_idx + _count], \ ((arr).num - _idx - _count) * sizeof((arr).elem[0])); \ (arr).num -= _count; \ } while (0) #define PL_ARRAY_REMOVE_AT(arr, idx) PL_ARRAY_REMOVE_RANGE(arr, idx, 1) #define PL_ARRAY_INSERT_AT(parent, arr, idx, ...) \ do { \ size_t _idx = (idx); \ assert(_idx <= (arr).num); \ PL_ARRAY_GROW(parent, arr); \ memmove(&(arr).elem[_idx + 1], &(arr).elem[_idx], \ ((arr).num++ - _idx) * sizeof((arr).elem[0])); \ (arr).elem[_idx] = __VA_ARGS__; \ } while (0) // Returns whether or not there was any element to pop #define PL_ARRAY_POP(arr, out) \ ((arr).num > 0 \ ? (*(out) = (arr).elem[--(arr).num], true) \ : false \ ) // Wrapper for dealing with non-PL_ARRAY arrays #define PL_ARRAY_APPEND_RAW(parent, arr, idxvar, ...) \ do { \ PL_ARRAY(__typeof__((arr)[0])) _arr = { (arr), (idxvar) }; \ PL_ARRAY_APPEND(parent, _arr, __VA_ARGS__); \ (arr) = _arr.elem; \ (idxvar) = _arr.num; \ } while (0) libplacebo-v4.192.1/src/pl_assert.h000066400000000000000000000025761417677245700171440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #ifndef NDEBUG # define pl_assert assert #else # define pl_assert(expr) \ do { \ if (!(expr)) { \ fprintf(stderr, "Assertion failed: %s in %s:%d\n", \ #expr, __FILE__, __LINE__); \ abort(); \ } \ } while (0) #endif // In C11, static asserts must have a string message #define pl_static_assert(expr) static_assert(expr, #expr) libplacebo-v4.192.1/src/pl_string.c000066400000000000000000000124501417677245700171340ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" static void grow_str(void *alloc, pl_str *str, size_t len) { // Like pl_grow, but with some extra headroom if (len > pl_get_size(str->buf)) str->buf = pl_realloc(alloc, str->buf, len * 1.5); } void pl_str_append(void *alloc, pl_str *str, pl_str append) { if (!append.len) return; // Also append an extra \0 for convenience, since a lot of the time // this function will be used to generate a string buffer grow_str(alloc, str, str->len + append.len + 1); memcpy(str->buf + str->len, append.buf, append.len); str->len += append.len; str->buf[str->len] = '\0'; } void pl_str_append_asprintf(void *alloc, pl_str *str, const char *fmt, ...) { va_list ap; va_start(ap, fmt); pl_str_append_vasprintf(alloc, str, fmt, ap); va_end(ap); } void pl_str_append_vasprintf(void *alloc, pl_str *str, const char *fmt, va_list ap) { // First, we need to determine the size that will be required for // printing the entire string. Do this by making a copy of the va_list // and printing it to a null buffer. va_list copy; va_copy(copy, ap); int size = vsnprintf(NULL, 0, fmt, copy); va_end(copy); if (size < 0) return; // Make room in `str` and format to there directly grow_str(alloc, str, str->len + size + 1); str->len += vsnprintf((char *) (str->buf + str->len), size + 1, fmt, ap); } int pl_str_sscanf(pl_str str, const char *fmt, ...) { char *tmp = pl_strdup0(NULL, str); va_list va; va_start(va, fmt); int ret = vsscanf(tmp, fmt, va); va_end(va); pl_free(tmp); return ret; } int pl_strchr(pl_str str, int c) { if (!str.len) return -1; void *pos = memchr(str.buf, c, str.len); if (pos) return (intptr_t) pos - (intptr_t) str.buf; return -1; } size_t pl_strspn(pl_str str, const char *accept) { for (size_t i = 0; i < str.len; i++) { if (!strchr(accept, str.buf[i])) return i; } return str.len; } size_t pl_strcspn(pl_str str, const char *reject) { for (size_t i = 0; i < str.len; i++) { if (strchr(reject, str.buf[i])) return i; } return str.len; } static inline bool pl_isspace(char c) { switch (c) { case ' ': case '\n': case '\r': case '\t': case '\v': case '\f': return true; default: return false; } } pl_str pl_str_strip(pl_str str) { while (str.len && pl_isspace(str.buf[0])) { str.buf++; str.len--; } while (str.len && pl_isspace(str.buf[str.len - 1])) str.len--; return str; } int pl_str_find(pl_str haystack, pl_str needle) { if (!needle.len) return 0; for (size_t i = 0; i + needle.len <= haystack.len; i++) { if (memcmp(&haystack.buf[i], needle.buf, needle.len) == 0) return i; } return -1; } pl_str pl_str_split_char(pl_str str, char sep, pl_str *out_rest) { int pos = pl_strchr(str, sep); if (pos < 0) { if (out_rest) *out_rest = (pl_str) {0}; return str; } else { if (out_rest) *out_rest = pl_str_drop(str, pos + 1); return pl_str_take(str, pos); } } pl_str pl_str_split_str(pl_str str, pl_str sep, pl_str *out_rest) { int pos = pl_str_find(str, sep); if (pos < 0) { if (out_rest) *out_rest = (pl_str) {0}; return str; } else { if (out_rest) *out_rest = pl_str_drop(str, pos + sep.len); return pl_str_take(str, pos); } } static bool get_hexdigit(pl_str *str, int *digit) { while (str->len && pl_isspace(str->buf[0])) { str->buf++; str->len--; } if (!str->len) { *digit = -1; // EOF return true; } char c = str->buf[0]; str->buf++; str->len--; if (c >= '0' && c <= '9') { *digit = c - '0'; } else if (c >= 'a' && c <= 'f') { *digit = c - 'a' + 10; } else if (c >= 'A' && c <= 'F') { *digit = c - 'A' + 10; } else { return false; // invalid char } return true; } bool pl_str_decode_hex(void *alloc, pl_str hex, pl_str *out) { if (!out) return false; uint8_t *buf = pl_alloc(alloc, hex.len / 2); int len = 0; while (hex.len) { int a, b; if (!get_hexdigit(&hex, &a) || !get_hexdigit(&hex, &b)) goto error; // invalid char if (a < 0) // EOF break; if (b < 0) // only one digit goto error; buf[len++] = (a << 4) | b; } *out = (pl_str) { buf, len }; return true; error: pl_free(buf); return false; } libplacebo-v4.192.1/src/pl_string.h000066400000000000000000000147521417677245700171500ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" typedef struct pl_str { uint8_t *buf; size_t len; } pl_str; // For formatting with "%.*s" #define PL_STR_FMT(str) (int)((str).len), ((str).buf ? (char *)((str).buf) : "") static inline pl_str pl_str0(const char *str) { return (pl_str) { .buf = (uint8_t *) str, .len = str ? strlen(str) : 0, }; } // Macro version of pl_str0, for constants #define PL_STR0(str) ((pl_str) { (uint8_t *) (str), (str) ? strlen(str) : 0 }) static inline pl_str pl_strdup(void *alloc, pl_str str) { return (pl_str) { .buf = str.len ? pl_memdup(alloc, str.buf, str.len) : NULL, .len = str.len, }; } // Always returns a valid string static inline char *pl_strdup0(void *alloc, pl_str str) { return pl_strndup0(alloc, str.len ? (char *) str.buf : "", str.len); } void pl_str_append(void *alloc, pl_str *str, pl_str append); // Locale-sensitive string functions char *pl_asprintf(void *parent, const char *fmt, ...) PL_PRINTF(2, 3); char *pl_vasprintf(void *parent, const char *fmt, va_list ap) PL_PRINTF(2, 0); void pl_str_append_asprintf(void *alloc, pl_str *str, const char *fmt, ...) PL_PRINTF(3, 4); void pl_str_append_vasprintf(void *alloc, pl_str *str, const char *fmt, va_list va) PL_PRINTF(3, 0); int pl_str_sscanf(pl_str str, const char *fmt, ...); // Locale-invariant versions of append_(v)asprintf // // NOTE: These only support a small handful of modifiers. Check `format.c` // for a list. Calling them on an invalid string will abort! void pl_str_append_asprintf_c(void *alloc, pl_str *str, const char *fmt, ...) PL_PRINTF(3, 4); void pl_str_append_vasprintf_c(void *alloc, pl_str *str, const char *fmt, va_list va) PL_PRINTF(3, 0); // Locale-invariant number parsing bool pl_str_parse_double(pl_str str, double *out); bool pl_str_parse_int64(pl_str str, int64_t *out); static inline bool pl_str_parse_float(pl_str str, float *out) { double dbl; bool ret = pl_str_parse_double(str, &dbl); *out = (float) dbl; return ret; } static inline bool pl_str_parse_int(pl_str str, int *out) { int64_t i64; bool ret = pl_str_parse_int64(str, &i64); *out = (int) i64; return ret; } // Variants of string.h functions int pl_strchr(pl_str str, int c); size_t pl_strspn(pl_str str, const char *accept); size_t pl_strcspn(pl_str str, const char *reject); // Strip leading/trailing whitespace pl_str pl_str_strip(pl_str str); // Generic functions for cutting up strings static inline pl_str pl_str_take(pl_str str, size_t len) { if (len < str.len) str.len = len; return str; } static inline pl_str pl_str_drop(pl_str str, size_t len) { if (len >= str.len) return (pl_str) {0}; str.buf += len; str.len -= len; return str; } // Find a substring in another string, and return its index (or -1) int pl_str_find(pl_str haystack, pl_str needle); // String splitting functions. These return the part of the string before // the separator, and optionally the rest (in `out_rest`). // // Note that the separator is not included as part of either string. pl_str pl_str_split_char(pl_str str, char sep, pl_str *out_rest); pl_str pl_str_split_str(pl_str str, pl_str sep, pl_str *out_rest); static inline pl_str pl_str_getline(pl_str str, pl_str *out_rest) { return pl_str_split_char(str, '\n', out_rest); } // Decode a string containing hexadecimal data. All whitespace will be silently // ignored. When successful, this allocates a new array to store the output. bool pl_str_decode_hex(void *alloc, pl_str hex, pl_str *out); // Compute a fast 64-bit hash uint64_t pl_mem_hash(const void *mem, size_t size); static inline void pl_hash_merge(uint64_t *accum, uint64_t hash) { *accum ^= hash + 0x9e3779b9 + (*accum << 6) + (*accum >> 2); } static inline uint64_t pl_str_hash(pl_str str) { return pl_mem_hash(str.buf, str.len); } static inline uint64_t pl_str0_hash(const char *str) { return pl_mem_hash(str, str ? strlen(str) : 0); } static inline bool pl_str_equals(pl_str str1, pl_str str2) { if (str1.len != str2.len) return false; if (str1.buf == str2.buf || !str1.len) return true; return memcmp(str1.buf, str2.buf, str1.len) == 0; } static inline bool pl_str_startswith(pl_str str, pl_str prefix) { if (!prefix.len) return true; if (str.len < prefix.len) return false; return memcmp(str.buf, prefix.buf, prefix.len) == 0; } static inline bool pl_str_endswith(pl_str str, pl_str suffix) { if (!suffix.len) return true; if (str.len < suffix.len) return false; return memcmp(str.buf + str.len - suffix.len, suffix.buf, suffix.len) == 0; } static inline bool pl_str_eatstart(pl_str *str, pl_str prefix) { if (!pl_str_startswith(*str, prefix)) return false; str->buf += prefix.len; str->len -= prefix.len; return true; } static inline bool pl_str_eatend(pl_str *str, pl_str suffix) { if (!pl_str_endswith(*str, suffix)) return false; str->len -= suffix.len; return true; } // Convenience wrappers for the above which save the use of a pl_str0 static inline pl_str pl_str_split_str0(pl_str str, const char *sep, pl_str *out_rest) { return pl_str_split_str(str, pl_str0(sep), out_rest); } static inline bool pl_str_startswith0(pl_str str, const char *prefix) { return pl_str_startswith(str, pl_str0(prefix)); } static inline bool pl_str_endswith0(pl_str str, const char *suffix) { return pl_str_endswith(str, pl_str0(suffix)); } static inline bool pl_str_equals0(pl_str str1, const char *str2) { return pl_str_equals(str1, pl_str0(str2)); } static inline bool pl_str_eatstart0(pl_str *str, const char *prefix) { return pl_str_eatstart(str, pl_str0(prefix)); } static inline bool pl_str_eatend0(pl_str *str, const char *prefix) { return pl_str_eatend(str, pl_str0(prefix)); } libplacebo-v4.192.1/src/pl_thread.h000066400000000000000000000017421417677245700171040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #ifdef PL_HAVE_WIN32 #include "pl_thread_win32.h" #elif defined(PL_HAVE_PTHREAD) #include "pl_thread_pthread.h" #else #error No threading implementation available! #endif #define pl_mutex_init(mutex) \ pl_mutex_init_type(mutex, PL_MUTEX_NORMAL) libplacebo-v4.192.1/src/pl_thread_pthread.h000066400000000000000000000056761417677245700206250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include typedef pthread_mutex_t pl_mutex; typedef pthread_cond_t pl_cond; enum pl_mutex_type { PL_MUTEX_NORMAL = 0, PL_MUTEX_RECURSIVE, }; static inline int pl_mutex_init_type_internal(pl_mutex *mutex, enum pl_mutex_type mtype) { int mutex_type; switch (mtype) { case PL_MUTEX_RECURSIVE: mutex_type = PTHREAD_MUTEX_RECURSIVE; break; case PL_MUTEX_NORMAL: default: #ifndef NDEBUG mutex_type = PTHREAD_MUTEX_ERRORCHECK; #else mutex_type = PTHREAD_MUTEX_DEFAULT; #endif break; } int ret = 0; pthread_mutexattr_t attr; ret = pthread_mutexattr_init(&attr); if (ret != 0) return ret; pthread_mutexattr_settype(&attr, mutex_type); ret = pthread_mutex_init(mutex, &attr); pthread_mutexattr_destroy(&attr); return ret; } #define pl_mutex_init_type(mutex, mtype) \ PL_CHECK_ERR(pl_mutex_init_type_internal(mutex, mtype)) #define pl_mutex_destroy pthread_mutex_destroy #define pl_mutex_lock pthread_mutex_lock #define pl_mutex_unlock pthread_mutex_unlock static inline int pl_cond_init(pl_cond *cond) { int ret = 0; pthread_condattr_t attr; ret = pthread_condattr_init(&attr); if (ret != 0) return ret; #ifdef PTHREAD_HAS_SETCLOCK pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); #endif ret = pthread_cond_init(cond, &attr); pthread_condattr_destroy(&attr); return ret; } #define pl_cond_destroy pthread_cond_destroy #define pl_cond_broadcast pthread_cond_broadcast #define pl_cond_signal pthread_cond_signal #define pl_cond_wait pthread_cond_wait static inline int pl_cond_timedwait(pl_cond *cond, pl_mutex *mutex, uint64_t timeout) { if (timeout == UINT64_MAX) return pthread_cond_wait(cond, mutex); struct timespec ts; #ifdef PTHREAD_HAS_SETCLOCK clock_gettime(CLOCK_MONOTONIC, &ts); #else clock_gettime(CLOCK_REALTIME, &ts); #endif ts.tv_sec += timeout / 1000000000LLU; ts.tv_nsec += timeout % 1000000000LLU; if (ts.tv_nsec > 1000000000LLU) { ts.tv_nsec -= 1000000000LLU; ts.tv_sec++; } return pthread_cond_timedwait(cond, mutex, &ts); } libplacebo-v4.192.1/src/pl_thread_win32.h000066400000000000000000000046241417677245700201300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include typedef CRITICAL_SECTION pl_mutex; typedef CONDITION_VARIABLE pl_cond; enum pl_mutex_type { PL_MUTEX_NORMAL = 0, PL_MUTEX_RECURSIVE, }; static inline int pl_mutex_init_type_internal(pl_mutex *mutex, enum pl_mutex_type mtype) { return !InitializeCriticalSectionEx(mutex, 0, 0); } #define pl_mutex_init_type(mutex, mtype) \ PL_CHECK_ERR(pl_mutex_init_type_internal(mutex, mtype)) static inline int pl_mutex_destroy(pl_mutex *mutex) { DeleteCriticalSection(mutex); return 0; } static inline int pl_mutex_lock(pl_mutex *mutex) { EnterCriticalSection(mutex); return 0; } static inline int pl_mutex_unlock(pl_mutex *mutex) { LeaveCriticalSection(mutex); return 0; } static inline int pl_cond_init(pl_cond *cond) { InitializeConditionVariable(cond); return 0; } static inline int pl_cond_destroy(pl_cond *cond) { // condition variables are not destroyed return 0; } static inline int pl_cond_broadcast(pl_cond *cond) { WakeAllConditionVariable(cond); return 0; } static inline int pl_cond_signal(pl_cond *cond) { WakeConditionVariable(cond); return 0; } static inline int pl_cond_wait(pl_cond *cond, pl_mutex *mutex) { return !SleepConditionVariableCS(cond, mutex, INFINITE); } static inline int pl_cond_timedwait(pl_cond *cond, pl_mutex *mutex, uint64_t timeout) { if (timeout == UINT64_MAX) return pl_cond_wait(cond, mutex); BOOL bRet = SleepConditionVariableCS(cond, mutex, PL_MIN(timeout / 1000000LLU, INFINITE - 1)); if (bRet == FALSE) { if (GetLastError() == ERROR_TIMEOUT) return ETIMEDOUT; else return EINVAL; } return 0; } libplacebo-v4.192.1/src/renderer.c000066400000000000000000003275641417677245700167600ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "filters.h" #include "shaders.h" #include "dispatch.h" struct cached_frame { uint64_t signature; uint64_t params_hash; // for detecting `pl_render_params` changes struct pl_color_space color; struct pl_icc_profile profile; pl_tex tex; int comps; bool evict; // for garbage collection }; struct sampler { pl_shader_obj upscaler_state; pl_shader_obj downscaler_state; }; struct osd_vertex { float pos[2]; float coord[2]; float color[4]; }; struct pl_renderer { pl_gpu gpu; pl_dispatch dp; pl_log log; // Texture format to use for intermediate textures pl_fmt fbofmt[5]; // Cached feature checks (inverted) bool disable_compute; // disable the use of compute shaders bool disable_sampling; // disable use of advanced scalers bool disable_debanding; // disable the use of debanding shaders bool disable_linear_hdr; // disable linear scaling for HDR signals bool disable_linear_sdr; // disable linear scaling for SDR signals bool disable_blending; // disable blending for the target/fbofmt bool disable_overlay; // disable rendering overlays bool disable_icc; // disable usage of ICC profiles bool disable_peak_detect; // disable peak detection shader bool disable_grain; // disable film grain code bool disable_hooks; // disable user hooks / custom shaders bool disable_mixing; // disable frame mixing // Shader resource objects and intermediate textures (FBOs) pl_shader_obj tone_map_state; pl_shader_obj dither_state; pl_shader_obj icc_state; pl_shader_obj grain_state[4]; pl_shader_obj lut_state[3]; PL_ARRAY(pl_tex) fbos; struct sampler sampler_main; struct sampler samplers_src[4]; struct sampler samplers_dst[4]; bool peak_detect_active; // Temporary storage for vertex/index data PL_ARRAY(struct osd_vertex) osd_vertices; PL_ARRAY(uint16_t) osd_indices; struct pl_vertex_attrib osd_attribs[3]; // Frame cache (for frame mixing / interpolation) PL_ARRAY(struct cached_frame) frames; PL_ARRAY(pl_tex) frame_fbos; }; enum { // Index into `lut_state` LUT_IMAGE, LUT_TARGET, LUT_PARAMS, }; static void find_fbo_format(pl_renderer rr) { struct { enum pl_fmt_type type; int depth; enum pl_fmt_caps caps; } configs[] = { // Prefer floating point formats first {PL_FMT_FLOAT, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_FLOAT, 16, PL_FMT_CAP_SAMPLEABLE}, // Otherwise, fall back to unorm/snorm, preferring linearly sampleable {PL_FMT_UNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_SNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 16, PL_FMT_CAP_SAMPLEABLE}, {PL_FMT_SNORM, 16, PL_FMT_CAP_SAMPLEABLE}, // As a final fallback, allow 8-bit FBO formats (for UNORM only) {PL_FMT_UNORM, 8, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 8, PL_FMT_CAP_SAMPLEABLE}, }; pl_fmt fmt = NULL; for (int i = 0; i < PL_ARRAY_SIZE(configs); i++) { fmt = pl_find_fmt(rr->gpu, configs[i].type, 4, configs[i].depth, 0, PL_FMT_CAP_RENDERABLE | configs[i].caps); if (fmt) { rr->fbofmt[4] = fmt; // Probe the right variant for each number of channels, falling // back to the next biggest format for (int c = 1; c < 4; c++) { rr->fbofmt[c] = pl_find_fmt(rr->gpu, configs[i].type, c, configs[i].depth, 0, fmt->caps); rr->fbofmt[c] = PL_DEF(rr->fbofmt[c], rr->fbofmt[c+1]); } break; } } if (!fmt) { PL_WARN(rr, "Found no renderable FBO format! Most features disabled"); return; } if (!(fmt->caps & PL_FMT_CAP_STORABLE)) { PL_INFO(rr, "Found no storable FBO format; compute shaders disabled"); rr->disable_compute = true; } if (fmt->type != PL_FMT_FLOAT) { PL_INFO(rr, "Found no floating point FBO format; linear light " "processing disabled for HDR material"); rr->disable_linear_hdr = true; } if (fmt->component_depth[0] < 16) { PL_WARN(rr, "FBO format precision low (<16 bit); linear light " "processing disabled"); rr->disable_linear_sdr = true; } } pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu) { pl_renderer rr = pl_alloc_ptr(NULL, rr); *rr = (struct pl_renderer) { .gpu = gpu, .log = log, .dp = pl_dispatch_create(log, gpu), .osd_attribs = { { .name = "pos", .offset = offsetof(struct osd_vertex, pos), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "coord", .offset = offsetof(struct osd_vertex, coord), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "osd_color", .offset = offsetof(struct osd_vertex, color), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 4), } }, }; assert(rr->dp); find_fbo_format(rr); return rr; } static void sampler_destroy(pl_renderer rr, struct sampler *sampler) { pl_shader_obj_destroy(&sampler->upscaler_state); pl_shader_obj_destroy(&sampler->downscaler_state); } void pl_renderer_destroy(pl_renderer *p_rr) { pl_renderer rr = *p_rr; if (!rr) return; // Free all intermediate FBOs for (int i = 0; i < rr->fbos.num; i++) pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]); for (int i = 0; i < rr->frames.num; i++) pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); for (int i = 0; i < rr->frame_fbos.num; i++) pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]); // Free all shader resource objects pl_shader_obj_destroy(&rr->tone_map_state); pl_shader_obj_destroy(&rr->dither_state); pl_shader_obj_destroy(&rr->icc_state); for (int i = 0; i < PL_ARRAY_SIZE(rr->lut_state); i++) pl_shader_obj_destroy(&rr->lut_state[i]); for (int i = 0; i < PL_ARRAY_SIZE(rr->grain_state); i++) pl_shader_obj_destroy(&rr->grain_state[i]); // Free all samplers sampler_destroy(rr, &rr->sampler_main); for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_src); i++) sampler_destroy(rr, &rr->samplers_src[i]); for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_dst); i++) sampler_destroy(rr, &rr->samplers_dst[i]); pl_dispatch_destroy(&rr->dp); pl_free_ptr(p_rr); } size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache) { return pl_dispatch_save(rr->dp, out_cache); } void pl_renderer_load(pl_renderer rr, const uint8_t *cache) { pl_dispatch_load(rr->dp, cache); } void pl_renderer_flush_cache(pl_renderer rr) { for (int i = 0; i < rr->frames.num; i++) pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); rr->frames.num = 0; pl_reset_detected_peak(rr->tone_map_state); rr->peak_detect_active = false; } const struct pl_render_params pl_render_fast_params = { PL_RENDER_DEFAULTS }; const struct pl_render_params pl_render_default_params = { PL_RENDER_DEFAULTS .upscaler = &pl_filter_spline36, .downscaler = &pl_filter_mitchell, .sigmoid_params = &pl_sigmoid_default_params, .dither_params = &pl_dither_default_params, }; const struct pl_render_params pl_render_high_quality_params = { PL_RENDER_DEFAULTS .upscaler = &pl_filter_ewa_lanczos, .downscaler = &pl_filter_mitchell, .sigmoid_params = &pl_sigmoid_default_params, .peak_detect_params = &pl_peak_detect_default_params, .dither_params = &pl_dither_default_params, .deband_params = &pl_deband_default_params, }; // This is only used as a sentinel, to use the GLSL implementation static double oversample(const struct pl_filter_function *k, double x) { pl_unreachable(); } static const struct pl_filter_function oversample_kernel = { .weight = oversample, .tunable = {true}, .params = {0.0}, }; const struct pl_filter_config pl_filter_oversample = { .kernel = &oversample_kernel, }; const struct pl_filter_preset pl_frame_mixers[] = { { "none", NULL, "No frame mixing" }, { "oversample", &pl_filter_oversample, "Oversample (AKA SmoothMotion)" }, { "mitchell_clamp", &pl_filter_mitchell_clamp, "Cubic spline (clamped)" }, {0} }; const int pl_num_frame_mixers = PL_ARRAY_SIZE(pl_frame_mixers) - 1; const struct pl_filter_preset pl_scale_filters[] = { {"none", NULL, "Built-in sampling"}, {"oversample", &pl_filter_oversample, "Oversample (Aspect-preserving NN)"}, COMMON_FILTER_PRESETS, {0} }; const int pl_num_scale_filters = PL_ARRAY_SIZE(pl_scale_filters) - 1; #define FBOFMT(n) (params->disable_fbos ? NULL : rr->fbofmt[n]) // Represents a "in-flight" image, which is either a shader that's in the // process of producing some sort of image, or a texture that needs to be // sampled from struct img { // Effective texture size, always set int w, h; // Recommended format (falls back to FBOFMT otherwise), only for shaders pl_fmt fmt; // Exactly *one* of these two is set: pl_shader sh; pl_tex tex; // Current effective source area, will be sampled by the main scaler struct pl_rect2df rect; // The current effective colorspace struct pl_color_repr repr; struct pl_color_space color; int comps; }; // Plane 'type', ordered by incrementing priority enum plane_type { PLANE_INVALID = 0, PLANE_ALPHA, PLANE_CHROMA, PLANE_LUMA, PLANE_RGB, PLANE_XYZ, }; struct pass_state { void *tmp; pl_renderer rr; const struct pl_render_params *params; struct pl_render_info info; // for info callback // Represents the "current" image which we're in the process of rendering. // This is initially set by pass_read_image, and all of the subsequent // rendering steps will mutate this in-place. struct img img; // Represents the "reference rect". Canonically, this is functionally // equivalent to `image.crop`, but both guaranteed to be valid, and also // updates as the refplane evolves (e.g. due to user hook prescalers) struct pl_rect2df ref_rect; // Integer version of `target.crop`. Semantically identical. struct pl_rect2d dst_rect; // Logical end-to-end rotation pl_rotation rotation; // Cached copies of the `image` / `target` for this rendering pass, // corrected to make sure all rects etc. are properly defaulted/inferred. struct pl_frame image; struct pl_frame target; // Some extra plane metadata, inferred from `planes` enum plane_type src_type[4]; enum plane_type dst_type[4]; int src_ref, dst_ref; // index into `planes` // Metadata for `rr->fbos` bool *fbos_used; }; static void info_callback(void *priv, const struct pl_dispatch_info *dinfo) { struct pass_state *pass = priv; const struct pl_render_params *params = pass->params; if (!params->info_callback) return; pass->info.pass = dinfo; params->info_callback(params->info_priv, &pass->info); if (pass->info.stage == PL_RENDER_STAGE_FRAME) pass->info.index++; } static pl_tex get_fbo(struct pass_state *pass, int w, int h, pl_fmt fmt, int comps, pl_debug_tag debug_tag) { pl_renderer rr = pass->rr; comps = PL_DEF(comps, 4); fmt = PL_DEF(fmt, rr->fbofmt[comps]); if (!fmt) return NULL; struct pl_tex_params params = { .w = w, .h = h, .format = fmt, .sampleable = true, .renderable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, .storable = fmt->caps & PL_FMT_CAP_STORABLE, .debug_tag = debug_tag, }; int best_idx = -1; int best_diff = 0; // Find the best-fitting texture out of rr->fbos for (int i = 0; i < rr->fbos.num; i++) { if (pass->fbos_used[i]) continue; // Orthogonal distance, with penalty for format mismatches int diff = abs(rr->fbos.elem[i]->params.w - w) + abs(rr->fbos.elem[i]->params.h - h) + ((rr->fbos.elem[i]->params.format != fmt) ? 1000 : 0); if (best_idx < 0 || diff < best_diff) { best_idx = i; best_diff = diff; } } // No texture found at all, add a new one if (best_idx < 0) { best_idx = rr->fbos.num; PL_ARRAY_APPEND(rr, rr->fbos, NULL); pl_grow(pass->tmp, &pass->fbos_used, rr->fbos.num * sizeof(bool)); pass->fbos_used[best_idx] = false; } if (!pl_tex_recreate(rr->gpu, &rr->fbos.elem[best_idx], ¶ms)) return NULL; pass->fbos_used[best_idx] = true; return rr->fbos.elem[best_idx]; } // Forcibly convert an img to `tex`, dispatching where necessary static pl_tex _img_tex(struct pass_state *pass, struct img *img, pl_debug_tag tag) { if (img->tex) { pl_assert(!img->sh); return img->tex; } pl_renderer rr = pass->rr; pl_tex tex = get_fbo(pass, img->w, img->h, img->fmt, img->comps, tag); img->fmt = NULL; if (!tex) { PL_ERR(rr, "Failed creating FBO texture! Disabling advanced rendering.."); memset(rr->fbofmt, 0, sizeof(rr->fbofmt)); pl_dispatch_abort(rr->dp, &img->sh); return NULL; } pl_assert(img->sh); bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &img->sh, .target = tex, )); if (!ok) { PL_ERR(rr, "Failed dispatching intermediate pass!"); img->sh = pl_dispatch_begin(rr->dp); return NULL; } img->tex = tex; return img->tex; } #define img_tex(pass, img) _img_tex(pass, img, PL_DEBUG_TAG) // Forcibly convert an img to `sh`, sampling where necessary static pl_shader img_sh(struct pass_state *pass, struct img *img) { if (img->sh) { pl_assert(!img->tex); return img->sh; } pl_assert(img->tex); img->sh = pl_dispatch_begin(pass->rr->dp); pl_shader_sample_direct(img->sh, pl_sample_src( .tex = img->tex )); img->tex = NULL; return img->sh; } enum sampler_type { SAMPLER_DIRECT, // pick based on texture caps SAMPLER_NEAREST, // direct sampling, force nearest SAMPLER_BICUBIC, // fast bicubic scaling SAMPLER_COMPLEX, // complex custom filters SAMPLER_OVERSAMPLE, }; enum sampler_dir { SAMPLER_NOOP, // 1:1 scaling SAMPLER_UP, // upscaling SAMPLER_DOWN, // downscaling }; struct sampler_info { const struct pl_filter_config *config; // if applicable enum sampler_type type; enum sampler_dir dir; enum sampler_dir dir_sep[2]; }; static struct sampler_info sample_src_info(struct pass_state *pass, const struct pl_sample_src *src) { const struct pl_render_params *params = pass->params; struct sampler_info info = {0}; pl_renderer rr = pass->rr; float rx = src->new_w / fabsf(pl_rect_w(src->rect)); if (rx < 1.0 - 1e-6) { info.dir_sep[0] = SAMPLER_DOWN; } else if (rx > 1.0 + 1e-6) { info.dir_sep[0] = SAMPLER_UP; } float ry = src->new_h / fabsf(pl_rect_h(src->rect)); if (ry < 1.0 - 1e-6) { info.dir_sep[1] = SAMPLER_DOWN; } else if (ry > 1.0 + 1e-6) { info.dir_sep[1] = SAMPLER_UP; } // We use PL_MAX so downscaling overrides upscaling when choosing scalers info.dir = PL_MAX(info.dir_sep[0], info.dir_sep[1]); switch (info.dir) { case SAMPLER_DOWN: info.config = params->downscaler; break; case SAMPLER_UP: info.config = params->upscaler; break; case SAMPLER_NOOP: info.type = SAMPLER_NEAREST; return info; } int comps = PL_DEF(src->components, 4); if (!FBOFMT(comps) || rr->disable_sampling || !info.config) { info.type = SAMPLER_DIRECT; } else if (info.config->kernel->weight == oversample) { info.type = SAMPLER_OVERSAMPLE; } else { info.type = SAMPLER_COMPLEX; // Try using faster replacements for GPU built-in scalers pl_fmt texfmt = src->tex ? src->tex->params.format : rr->fbofmt[comps]; bool can_linear = texfmt->caps & PL_FMT_CAP_LINEAR; bool can_fast = info.dir == SAMPLER_UP || params->skip_anti_aliasing; if (can_fast && !params->disable_builtin_scalers) { if (can_linear && info.config == &pl_filter_bicubic) info.type = SAMPLER_BICUBIC; if (can_linear && info.config == &pl_filter_bilinear) info.type = SAMPLER_DIRECT; if (info.config == &pl_filter_nearest) info.type = can_linear ? SAMPLER_NEAREST : SAMPLER_DIRECT; } } return info; } static void dispatch_sampler(struct pass_state *pass, pl_shader sh, struct sampler *sampler, bool no_compute, const struct pl_sample_src *src) { const struct pl_render_params *params = pass->params; if (!sampler) goto fallback; pl_renderer rr = pass->rr; struct sampler_info info = sample_src_info(pass, src); pl_shader_obj *lut = NULL; switch (info.dir) { case SAMPLER_NOOP: goto fallback; case SAMPLER_DOWN: lut = &sampler->downscaler_state; break; case SAMPLER_UP: lut = &sampler->upscaler_state; break; } switch (info.type) { case SAMPLER_DIRECT: goto fallback; case SAMPLER_NEAREST: pl_shader_sample_nearest(sh, src); return; case SAMPLER_OVERSAMPLE: pl_shader_sample_oversample(sh, src, info.config->kernel->params[0]); return; case SAMPLER_BICUBIC: pl_shader_sample_bicubic(sh, src); return; case SAMPLER_COMPLEX: break; // continue below } pl_assert(lut); struct pl_sample_filter_params fparams = { .filter = *info.config, .lut_entries = params->lut_entries, .cutoff = params->polar_cutoff, .antiring = params->antiringing_strength, .no_compute = rr->disable_compute || no_compute, .no_widening = params->skip_anti_aliasing, .lut = lut, }; bool ok; if (info.config->polar) { // Polar samplers are always a single function call ok = pl_shader_sample_polar(sh, src, &fparams); } else if (info.dir_sep[0] && info.dir_sep[1]) { // Scaling is needed in both directions pl_shader tsh = pl_dispatch_begin(rr->dp); ok = pl_shader_sample_ortho(tsh, PL_SEP_VERT, src, &fparams); if (!ok) { pl_dispatch_abort(rr->dp, &tsh); goto done; } struct img img = { .sh = tsh, .w = src->tex->params.w, .h = src->new_h, .comps = src->components, }; struct pl_sample_src src2 = *src; src2.tex = img_tex(pass, &img); src2.scale = 1.0; ok = src2.tex && pl_shader_sample_ortho(sh, PL_SEP_HORIZ, &src2, &fparams); } else if (info.dir_sep[0]) { // Scaling is needed only in the horizontal direction ok = pl_shader_sample_ortho(sh, PL_SEP_HORIZ, src, &fparams); } else { // Scaling is needed only in the vertical direction pl_assert(info.dir_sep[1]); ok = pl_shader_sample_ortho(sh, PL_SEP_VERT, src, &fparams); } done: if (!ok) { PL_ERR(rr, "Failed dispatching scaler.. disabling"); rr->disable_sampling = true; goto fallback; } return; fallback: // If all else fails, fall back to auto sampling pl_shader_sample_direct(sh, src); } static void swizzle_color(pl_shader sh, int comps, const int comp_map[4], bool force_alpha) { ident_t orig = sh_fresh(sh, "orig_color"); GLSL("vec4 %s = color; \n" "color = vec4(0.0, 0.0, 0.0, 1.0); \n", orig); static const int def_map[4] = {0, 1, 2, 3}; comp_map = PL_DEF(comp_map, def_map); for (int c = 0; c < comps; c++) { if (comp_map[c] >= 0) GLSL("color[%d] = %s[%d]; \n", c, orig, comp_map[c]); } if (force_alpha) GLSL("color.a = %s.a; \n", orig); } static void draw_overlays(struct pass_state *pass, pl_tex fbo, int comps, const int comp_map[4], const struct pl_overlay *overlays, int num, struct pl_color_space color, struct pl_color_repr repr, bool use_sigmoid, struct pl_transform2x2 *scale) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (num <= 0 || rr->disable_overlay) return; enum pl_fmt_caps caps = fbo->params.format->caps; if (!rr->disable_blending && !(caps & PL_FMT_CAP_BLENDABLE)) { PL_WARN(rr, "Trying to draw an overlay to a non-blendable target. " "Alpha blending is disabled, results may be incorrect!"); rr->disable_blending = true; } for (int n = 0; n < num; n++) { struct pl_overlay ol = overlays[n]; struct pl_overlay_part fallback; if (!ol.tex) { // Backwards compatibility ol.tex = ol.plane.texture; ol.parts = &fallback; ol.num_parts = 1; fallback = (struct pl_overlay_part) { .src = { .x0 = -ol.plane.shift_x, .y0 = -ol.plane.shift_y, .x1 = ol.tex->params.w - ol.plane.shift_x, .y1 = ol.tex->params.h - ol.plane.shift_y, }, .dst = ol.rect, .color = { ol.base_color[0], ol.base_color[1], ol.base_color[2], 1.0, }, }; } if (!ol.num_parts) continue; // Construct vertex/index buffers rr->osd_vertices.num = 0; rr->osd_indices.num = 0; for (int i = 0; i < ol.num_parts; i++) { const struct pl_overlay_part *part = &ol.parts[i]; #define EMIT_VERT(x, y) \ do { \ float pos[2] = { part->dst.x, part->dst.y }; \ if (scale) \ pl_transform2x2_apply(scale, pos); \ PL_ARRAY_APPEND(rr, rr->osd_vertices, (struct osd_vertex) { \ .pos = { \ 2.0 * (pos[0] / fbo->params.w) - 1.0, \ 2.0 * (pos[1] / fbo->params.h) - 1.0, \ }, \ .coord = { \ part->src.x / ol.tex->params.w, \ part->src.y / ol.tex->params.h, \ }, \ .color = { \ part->color[0], part->color[1], \ part->color[2], part->color[3], \ }, \ }); \ } while (0) int idx_base = rr->osd_vertices.num; EMIT_VERT(x0, y0); // idx 0: top left EMIT_VERT(x1, y0); // idx 1: top right EMIT_VERT(x0, y1); // idx 2: bottom left EMIT_VERT(x1, y1); // idx 3: bottom right PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 0); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 3); } // Draw parts pl_shader sh = pl_dispatch_begin(rr->dp); ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "osd_tex", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = ol.tex, .sample_mode = (ol.tex->params.format->caps & PL_FMT_CAP_LINEAR) ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, }, }); sh_describe(sh, "overlay"); GLSL("// overlay \n"); switch (ol.mode) { case PL_OVERLAY_NORMAL: GLSL("vec4 color = %s(%s, coord); \n", sh_tex_fn(sh, ol.tex->params), tex); break; case PL_OVERLAY_MONOCHROME: GLSL("vec4 color = osd_color; \n"); break; case PL_OVERLAY_MODE_COUNT: pl_unreachable(); }; sh->res.output = PL_SHADER_SIG_COLOR; pl_shader_decode_color(sh, &ol.repr, NULL); pl_shader_color_map(sh, params->color_map_params, ol.color, color, NULL, false); if (use_sigmoid) pl_shader_sigmoidize(sh, params->sigmoid_params); bool premul = repr.alpha == PL_ALPHA_PREMULTIPLIED; pl_shader_encode_color(sh, &repr); if (ol.mode == PL_OVERLAY_MONOCHROME) { GLSL("color.%s *= %s(%s, coord).r; \n", premul ? "rgba" : "a", sh_tex_fn(sh, ol.tex->params), tex); } swizzle_color(sh, comps, comp_map, true); struct pl_blend_params blend_params = { .src_rgb = premul ? PL_BLEND_ONE : PL_BLEND_SRC_ALPHA, .src_alpha = PL_BLEND_ONE, .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA, .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA, }; bool ok = pl_dispatch_vertex(rr->dp, pl_dispatch_vertex_params( .shader = &sh, .target = fbo, .blend_params = rr->disable_blending ? NULL : &blend_params, .vertex_stride = sizeof(struct osd_vertex), .num_vertex_attribs = ol.mode == PL_OVERLAY_NORMAL ? 2 : 3, .vertex_attribs = rr->osd_attribs, .vertex_position_idx = 0, .vertex_coords = PL_COORDS_NORMALIZED, .vertex_type = PL_PRIM_TRIANGLE_LIST, .vertex_count = rr->osd_indices.num, .vertex_data = rr->osd_vertices.elem, .index_data = rr->osd_indices.elem, )); if (!ok) { PL_ERR(rr, "Failed rendering overlays!"); rr->disable_overlay = true; return; } } } static pl_tex get_hook_tex(void *priv, int width, int height) { struct pass_state *pass = priv; return get_fbo(pass, width, height, NULL, 4, PL_DEBUG_TAG); } // Returns if any hook was applied (even if there were errors) static bool pass_hook(struct pass_state *pass, struct img *img, enum pl_hook_stage stage) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!rr->fbofmt[4] || rr->disable_hooks) return false; bool ret = false; for (int n = 0; n < params->num_hooks; n++) { const struct pl_hook *hook = params->hooks[n]; if (!(hook->stages & stage)) continue; PL_TRACE(rr, "Dispatching hook %d stage 0x%x", n, stage); struct pl_hook_params hparams = { .gpu = rr->gpu, .dispatch = rr->dp, .get_tex = get_hook_tex, .priv = pass, .stage = stage, .rect = img->rect, .repr = img->repr, .color = img->color, .components = img->comps, .src_rect = pass->ref_rect, .dst_rect = pass->dst_rect, }; // TODO: Add some sort of `test` API function to the hooks that allows // us to skip having to touch the `img` state at all for no-ops switch (hook->input) { case PL_HOOK_SIG_NONE: break; case PL_HOOK_SIG_TEX: { hparams.tex = img_tex(pass, img); if (!hparams.tex) { PL_ERR(rr, "Failed dispatching shader prior to hook!"); goto error; } break; } case PL_HOOK_SIG_COLOR: hparams.sh = img_sh(pass, img); break; case PL_HOOK_SIG_COUNT: pl_unreachable(); } struct pl_hook_res res = hook->hook(hook->priv, &hparams); if (res.failed) { PL_ERR(rr, "Failed executing hook, disabling"); goto error; } bool resizable = pl_hook_stage_resizable(stage); switch (res.output) { case PL_HOOK_SIG_NONE: break; case PL_HOOK_SIG_TEX: if (!resizable) { if (res.tex->params.w != img->w || res.tex->params.h != img->h || !pl_rect2d_eq(res.rect, img->rect)) { PL_ERR(rr, "User hook tried resizing non-resizable stage!"); goto error; } } *img = (struct img) { .tex = res.tex, .repr = res.repr, .color = res.color, .comps = res.components, .rect = res.rect, .w = res.tex->params.w, .h = res.tex->params.h, }; break; case PL_HOOK_SIG_COLOR: if (!resizable) { if (res.sh->output_w != img->w || res.sh->output_h != img->h || !pl_rect2d_eq(res.rect, img->rect)) { PL_ERR(rr, "User hook tried resizing non-resizable stage!"); goto error; } } *img = (struct img) { .sh = res.sh, .repr = res.repr, .color = res.color, .comps = res.components, .rect = res.rect, .w = res.sh->output_w, .h = res.sh->output_h, }; break; case PL_HOOK_SIG_COUNT: pl_unreachable(); } // a hook was performed successfully ret = true; } return ret; error: rr->disable_hooks = true; // Make sure the state remains as valid as possible, even if the resulting // shaders might end up nonsensical, to prevent segfaults if (!img->tex && !img->sh) img->sh = pl_dispatch_begin(rr->dp); return ret; } // `deband_src` results enum { DEBAND_NOOP = 0, // no debanding was performing DEBAND_NORMAL, // debanding was performed, the plane should still be scaled DEBAND_SCALED, // debanding took care of scaling as well }; static int deband_src(struct pass_state *pass, pl_shader psh, struct pl_sample_src *psrc) { const struct pl_render_params *params = pass->params; const struct pl_frame *image = &pass->image; pl_renderer rr = pass->rr; if (rr->disable_debanding || !params->deband_params) return DEBAND_NOOP; if (!(psrc->tex->params.format->caps & PL_FMT_CAP_LINEAR)) { PL_WARN(rr, "Debanding requires uploaded textures to be linearly " "sampleable (params.sample_mode = PL_TEX_SAMPLE_LINEAR)! " "Disabling debanding.."); rr->disable_debanding = true; return DEBAND_NOOP; } bool deband_scales = false; pl_shader sh = psh; struct pl_sample_src src = *psrc; // Only sample/deband the relevant cut-out, but round it to the nearest // integer to avoid doing fractional scaling src.rect.x0 = floorf(src.rect.x0); src.rect.y0 = floorf(src.rect.y0); src.rect.x1 = ceilf(src.rect.x1); src.rect.y1 = ceilf(src.rect.y1); src.new_w = pl_rect_w(src.rect); src.new_h = pl_rect_h(src.rect); if (src.new_w == psrc->new_w && src.new_h == psrc->new_h && pl_rect2d_eq(src.rect, psrc->rect)) { // If there's nothing left to be done (i.e. we're already rendering // an exact integer crop without scaling), also skip the scalers deband_scales = true; } else { sh = pl_dispatch_begin_ex(rr->dp, true); } // Divide the deband grain scale by the effective current colorspace nominal // peak, to make sure the output intensity of the grain is as independent // of the source as possible, even though it happens this early in the // process (well before any linearization / output adaptation) struct pl_deband_params dparams = *params->deband_params; dparams.grain /= image->color.hdr.max_luma / PL_COLOR_SDR_WHITE; pl_shader_deband(sh, &src, &dparams); if (deband_scales) return DEBAND_SCALED; struct img img = { .sh = sh, .w = src.new_w, .h = src.new_h, .comps = src.components, }; pl_tex new = img_tex(pass, &img); if (!new) { PL_ERR(rr, "Failed dispatching debanding shader.. disabling debanding!"); rr->disable_debanding = true; return DEBAND_NOOP; } // Update the original pl_sample_src to point to the new texture psrc->tex = new; psrc->rect.x0 -= src.rect.x0; psrc->rect.y0 -= src.rect.y0; psrc->rect.x1 -= src.rect.x0; psrc->rect.y1 -= src.rect.y0; psrc->scale = 1.0; return DEBAND_NORMAL; } static void hdr_update_peak(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!params->peak_detect_params || !pl_color_space_is_hdr(&pass->img.color)) goto cleanup; if (rr->disable_compute || rr->disable_peak_detect) goto cleanup; if (pass->img.color.hdr.max_luma <= pass->target.color.hdr.max_luma + 1e-6) goto cleanup; // no adaptation needed if (params->lut && params->lut_type == PL_LUT_CONVERSION) goto cleanup; // LUT handles tone mapping if (!FBOFMT(4) && !params->allow_delayed_peak_detect) { PL_WARN(rr, "Disabling peak detection because " "`allow_delayed_peak_detect` is false, but lack of FBOs " "forces the result to be delayed."); rr->disable_peak_detect = true; goto cleanup; } bool ok = pl_shader_detect_peak(img_sh(pass, &pass->img), pass->img.color, &rr->tone_map_state, params->peak_detect_params); if (!ok) { PL_WARN(rr, "Failed creating HDR peak detection shader.. disabling"); rr->disable_peak_detect = true; goto cleanup; } rr->peak_detect_active = true; return; cleanup: // No peak detection required or supported, so clean up the state to avoid // confusing it with later frames where peak detection is enabled again pl_reset_detected_peak(rr->tone_map_state); rr->peak_detect_active = false; } struct plane_state { enum plane_type type; struct pl_plane plane; struct img img; // for per-plane shaders }; static const char *plane_type_names[] = { [PLANE_INVALID] = "invalid", [PLANE_ALPHA] = "alpha", [PLANE_CHROMA] = "chroma", [PLANE_LUMA] = "luma", [PLANE_RGB] = "rgb", [PLANE_XYZ] = "xyz", }; static void log_plane_info(pl_renderer rr, const struct plane_state *st) { const struct pl_plane *plane = &st->plane; PL_TRACE(rr, " Type: %s", plane_type_names[st->type]); switch (plane->components) { case 0: PL_TRACE(rr, " Components: (none)"); break; case 1: PL_TRACE(rr, " Components: {%d}", plane->component_mapping[0]); break; case 2: PL_TRACE(rr, " Components: {%d %d}", plane->component_mapping[0], plane->component_mapping[1]); break; case 3: PL_TRACE(rr, " Components: {%d %d %d}", plane->component_mapping[0], plane->component_mapping[1], plane->component_mapping[2]); break; case 4: PL_TRACE(rr, " Components: {%d %d %d %d}", plane->component_mapping[0], plane->component_mapping[1], plane->component_mapping[2], plane->component_mapping[3]); break; } PL_TRACE(rr, " Rect: {%f %f} -> {%f %f}", st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1); PL_TRACE(rr, " Bits: %d (used) / %d (sampled), shift %d", st->img.repr.bits.color_depth, st->img.repr.bits.sample_depth, st->img.repr.bits.bit_shift); } // Returns true if grain was applied static bool plane_film_grain(struct pass_state *pass, int plane_idx, struct plane_state *st, const struct plane_state *ref, const struct pl_frame *image) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (rr->disable_grain) return false; struct img *img = &st->img; struct pl_plane *plane = &st->plane; struct pl_color_repr repr = st->img.repr; struct pl_film_grain_params grain_params = { .data = image->film_grain, .luma_tex = ref->plane.texture, .repr = &repr, .components = plane->components, }; switch (image->film_grain.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_H274: break; case PL_FILM_GRAIN_AV1: grain_params.luma_tex = ref->plane.texture; for (int c = 0; c < ref->plane.components; c++) { if (ref->plane.component_mapping[c] == PL_CHANNEL_Y) grain_params.luma_comp = c; } break; default: pl_unreachable(); } for (int c = 0; c < plane->components; c++) grain_params.component_mapping[c] = plane->component_mapping[c]; if (!pl_needs_film_grain(&grain_params)) return false; if (!FBOFMT(plane->components)) { PL_ERR(rr, "Film grain required but no renderable format available.. " "disabling!"); rr->disable_grain = true; return false; } grain_params.tex = img_tex(pass, img); if (!grain_params.tex) return false; img->sh = pl_dispatch_begin_ex(rr->dp, true); if (!pl_shader_film_grain(img->sh, &rr->grain_state[plane_idx], &grain_params)) { pl_dispatch_abort(rr->dp, &img->sh); rr->disable_grain = true; return false; } img->tex = NULL; if (!img_tex(pass, img)) { PL_ERR(rr, "Failed applying film grain.. disabling!"); pl_dispatch_abort(rr->dp, &img->sh); img->tex = grain_params.tex; rr->disable_grain = true; return false; } img->repr = repr; return true; } static const enum pl_hook_stage plane_hook_stages[] = { [PLANE_ALPHA] = PL_HOOK_ALPHA_INPUT, [PLANE_CHROMA] = PL_HOOK_CHROMA_INPUT, [PLANE_LUMA] = PL_HOOK_LUMA_INPUT, [PLANE_RGB] = PL_HOOK_RGB_INPUT, [PLANE_XYZ] = PL_HOOK_XYZ_INPUT, }; static enum pl_lut_type guess_frame_lut_type(const struct pl_frame *frame, bool reversed) { if (!frame->lut) return PL_LUT_UNKNOWN; if (frame->lut_type) return frame->lut_type; enum pl_color_system sys_in = frame->lut->repr_in.sys; enum pl_color_system sys_out = frame->lut->repr_out.sys; if (reversed) PL_SWAP(sys_in, sys_out); if (sys_in == PL_COLOR_SYSTEM_RGB && sys_out == sys_in) return PL_LUT_NORMALIZED; if (sys_in == frame->repr.sys && sys_out == PL_COLOR_SYSTEM_RGB) return PL_LUT_CONVERSION; // Unknown, just fall back to the default return PL_LUT_NATIVE; } static pl_fmt merge_fmt(pl_renderer rr, const struct img *a, const struct img *b) { pl_fmt fmta = a->tex ? a->tex->params.format : a->fmt; pl_fmt fmtb = b->tex->params.format; pl_assert(fmta && fmtb); if (fmta->type != fmtb->type) return NULL; int num_comps = PL_MIN(4, a->comps + b->comps); int min_depth = PL_MAX(a->repr.bits.sample_depth, b->repr.bits.sample_depth); // Only return formats that support all relevant caps of both formats const enum pl_fmt_caps mask = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR; enum pl_fmt_caps req_caps = (fmta->caps & mask) | (fmtb->caps & mask); return pl_find_fmt(rr->gpu, fmta->type, num_comps, min_depth, 0, req_caps); } // Applies a series of rough heuristics to figure out whether we expect any // performance gains from plane merging. This is basically a series of checks // for operations that we *know* benefit from merged planes static bool want_merge(struct pass_state *pass, const struct plane_state *st, const struct plane_state *ref) { const struct pl_render_params *params = pass->params; const pl_renderer rr = pass->rr; if (!rr->fbofmt[4]) return false; // Debanding if (!rr->disable_debanding && params->deband_params) return true; // Other plane hooks, which are generally nontrivial enum pl_hook_stage stage = plane_hook_stages[st->type]; for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->stages & stage) return true; } // Non-trivial scaling struct pl_sample_src src = { .new_w = ref->img.w, .new_h = ref->img.h, .rect = { .x1 = st->img.w, .y1 = st->img.h, }, }; struct sampler_info info = sample_src_info(pass, &src); if (info.type == SAMPLER_COMPLEX) return true; // Film grain synthesis, can be merged for compatible channels, saving on // redundant sampling of the grain/offset textures struct pl_film_grain_params grain_params = { .data = pass->image.film_grain, .repr = (struct pl_color_repr *) &st->img.repr, .components = st->plane.components, }; for (int c = 0; c < st->plane.components; c++) grain_params.component_mapping[c] = st->plane.component_mapping[c]; if (!rr->disable_grain && pl_needs_film_grain(&grain_params)) return true; return false; } // This scales and merges all of the source images, and initializes pass->img. static bool pass_read_image(struct pass_state *pass) { const struct pl_render_params *params = pass->params; struct pl_frame *image = &pass->image; pl_renderer rr = pass->rr; struct plane_state planes[4]; struct plane_state *ref = &planes[pass->src_ref]; for (int i = 0; i < image->num_planes; i++) { planes[i] = (struct plane_state) { .type = pass->src_type[i], .plane = image->planes[i], .img = { .w = image->planes[i].texture->params.w, .h = image->planes[i].texture->params.h, .tex = image->planes[i].texture, .repr = image->repr, .color = image->color, .comps = image->planes[i].components, }, }; } // Original ref texture, even after preprocessing pl_tex ref_tex = ref->plane.texture; // Merge all compatible planes into 'combined' shaders for (int i = 0; i < image->num_planes; i++) { struct plane_state *sti = &planes[i]; if (!sti->type) continue; if (!want_merge(pass, sti, ref)) continue; for (int j = i+1; j < image->num_planes; j++) { struct plane_state *stj = &planes[j]; bool merge = sti->type == stj->type && sti->img.w == stj->img.w && sti->img.h == stj->img.h && sti->plane.shift_x == stj->plane.shift_x && sti->plane.shift_y == stj->plane.shift_y; if (!merge) continue; pl_fmt fmt = merge_fmt(rr, &sti->img, &stj->img); if (!fmt) continue; PL_TRACE(rr, "Merging plane %d into plane %d", j, i); pl_shader sh = sti->img.sh; if (!sh) { sh = sti->img.sh = pl_dispatch_begin_ex(pass->rr->dp, true); sh_describe(sh, "merging planes"); GLSL("vec4 tmp; \n"); pl_shader_sample_direct(sh, pl_sample_src( .tex = sti->img.tex )); sti->img.tex = NULL; } pl_shader psh = pl_dispatch_begin_ex(pass->rr->dp, true); pl_shader_sample_direct(psh, pl_sample_src( .tex = stj->img.tex )); ident_t sub = sh_subpass(sh, psh); pl_dispatch_abort(rr->dp, &psh); if (!sub) break; // skip merging GLSL("tmp = %s(); \n", sub); for (int jc = 0; jc < stj->img.comps; jc++) { int map = stj->plane.component_mapping[jc]; if (!map) continue; int ic = sti->img.comps++; pl_assert(ic < 4); GLSL("color[%d] = tmp[%d]; \n", ic, jc); sti->plane.components = sti->img.comps; sti->plane.component_mapping[ic] = map; } sti->img.fmt = fmt; *stj = (struct plane_state) {0}; } if (!img_tex(pass, &sti->img)) { PL_ERR(rr, "Failed dispatching plane merging shader, disabling FBOs!"); memset(rr->fbofmt, 0, sizeof(rr->fbofmt)); return false; } } // Compute the sampling rc of each plane for (int i = 0; i < image->num_planes; i++) { struct plane_state *st = &planes[i]; if (!st->type) continue; float rx = (float) ref_tex->params.w / st->plane.texture->params.w, ry = (float) ref_tex->params.h / st->plane.texture->params.h; // Only accept integer scaling ratios. This accounts for the fact that // fractionally subsampled planes get rounded up to the nearest integer // size, which we want to discard. float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = st->plane.shift_x, sy = st->plane.shift_y; st->img.rect = (struct pl_rect2df) { .x0 = (image->crop.x0 - sx) / rrx, .y0 = (image->crop.y0 - sy) / rry, .x1 = (image->crop.x1 - sx) / rrx, .y1 = (image->crop.y1 - sy) / rry, }; PL_TRACE(rr, "Plane %d:", i); log_plane_info(rr, st); // Perform film grain synthesis if needed. Do this first because it // requires unmodified plane sizes, and also because it's closer to the // intent of the spec (which is to apply synthesis effectively during // decoding) if (plane_film_grain(pass, i, st, ref, image)) { PL_TRACE(rr, "After film grain:"); log_plane_info(rr, st); } if (pass_hook(pass, &st->img, plane_hook_stages[st->type])) { PL_TRACE(rr, "After user hooks:"); log_plane_info(rr, st); } // Update the conceptual width/height after applying plane shaders st->img.w = roundf(pl_rect_w(st->img.rect)); st->img.h = roundf(pl_rect_h(st->img.rect)); } pl_shader sh = pl_dispatch_begin_ex(rr->dp, true); sh_require(sh, PL_SHADER_SIG_NONE, 0, 0); // Initialize the color to black const char *neutral = "0.0, 0.0, 0.0"; if (pl_color_system_is_ycbcr_like(image->repr.sys)) neutral = "0.0, 0.5, 0.5"; GLSL("vec4 color = vec4(%s, 1.0); \n" "// pass_read_image \n" "{ \n" "vec4 tmp; \n", neutral); // For quality reasons, explicitly drop subpixel offsets from the ref rect // and re-add them as part of `pass->img.rect`, always rounding towards 0. // Additionally, drop anamorphic subpixel mismatches. float off_x = ref->img.rect.x0 - truncf(ref->img.rect.x0), off_y = ref->img.rect.y0 - truncf(ref->img.rect.y0), stretch_x = roundf(pl_rect_w(ref->img.rect)) / pl_rect_w(ref->img.rect), stretch_y = roundf(pl_rect_h(ref->img.rect)) / pl_rect_h(ref->img.rect); for (int i = 0; i < image->num_planes; i++) { struct plane_state *st = &planes[i]; const struct pl_plane *plane = &st->plane; if (!st->type) continue; float scale_x = pl_rect_w(st->img.rect) / pl_rect_w(ref->img.rect), scale_y = pl_rect_h(st->img.rect) / pl_rect_h(ref->img.rect), base_x = st->img.rect.x0 - scale_x * off_x, base_y = st->img.rect.y0 - scale_y * off_y; struct pl_sample_src src = { .tex = st->img.tex, .components = plane->components, .address_mode = plane->address_mode, .scale = pl_color_repr_normalize(&st->img.repr), .new_w = ref->img.w, .new_h = ref->img.h, .rect = { base_x, base_y, base_x + stretch_x * pl_rect_w(st->img.rect), base_y + stretch_y * pl_rect_h(st->img.rect), }, }; PL_TRACE(rr, "Aligning plane %d: {%f %f %f %f} -> {%f %f %f %f}", i, st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1, src.rect.x0, src.rect.y0, src.rect.x1, src.rect.y1); pl_shader psh = pl_dispatch_begin_ex(rr->dp, true); if (deband_src(pass, psh, &src) != DEBAND_SCALED) dispatch_sampler(pass, psh, &rr->samplers_src[i], false, &src); ident_t sub = sh_subpass(sh, psh); if (!sub) { // Can't merge shaders, so instead force FBO indirection here struct img inter_img = { .sh = psh, .w = ref->img.w, .h = ref->img.h, .comps = src.components, }; pl_tex inter_tex = img_tex(pass, &inter_img); if (!inter_tex) { PL_ERR(rr, "Failed dispatching subpass for plane.. disabling " "all plane shaders"); rr->disable_sampling = true; rr->disable_debanding = true; rr->disable_grain = true; pl_dispatch_abort(rr->dp, &sh); return false; } psh = pl_dispatch_begin_ex(rr->dp, true); pl_shader_sample_direct(psh, pl_sample_src( .tex = inter_tex )); sub = sh_subpass(sh, psh); pl_assert(sub); } GLSL("tmp = %s();\n", sub); for (int c = 0; c < src.components; c++) { if (plane->component_mapping[c] < 0) continue; GLSL("color[%d] = tmp[%d];\n", plane->component_mapping[c], c); } // we don't need it anymore pl_dispatch_abort(rr->dp, &psh); } GLSL("}\n"); pass->img = (struct img) { .sh = sh, .w = ref->img.w, .h = ref->img.h, .repr = ref->img.repr, .color = image->color, .comps = ref->img.repr.alpha ? 4 : 3, .rect = { off_x, off_y, off_x + pl_rect_w(ref->img.rect) / stretch_x, off_y + pl_rect_h(ref->img.rect) / stretch_y, }, }; // Update the reference rect to our adjusted image coordinates pass->ref_rect = pass->img.rect; pass_hook(pass, &pass->img, PL_HOOK_NATIVE); // Apply LUT logic and colorspace conversion enum pl_lut_type lut_type = guess_frame_lut_type(image, false); sh = img_sh(pass, &pass->img); bool needs_conversion = true; if (lut_type == PL_LUT_NATIVE || lut_type == PL_LUT_CONVERSION) { // Fix bit depth normalization before applying LUT float scale = pl_color_repr_normalize(&pass->img.repr); GLSL("color *= vec4(%s); \n", SH_FLOAT(scale)); pl_shader_set_alpha(sh, &pass->img.repr, PL_ALPHA_INDEPENDENT); pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]); if (lut_type == PL_LUT_CONVERSION) { pass->img.repr.sys = PL_COLOR_SYSTEM_RGB; pass->img.repr.levels = PL_COLOR_LEVELS_FULL; needs_conversion = false; } } if (needs_conversion) pl_shader_decode_color(sh, &pass->img.repr, params->color_adjustment); if (lut_type == PL_LUT_NORMALIZED) pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]); // Pre-multiply alpha channel before the rest of the pipeline, to avoid // bleeding colors from transparent regions into non-transparent regions pl_shader_set_alpha(sh, &pass->img.repr, PL_ALPHA_PREMULTIPLIED); pass_hook(pass, &pass->img, PL_HOOK_RGB); sh = NULL; // HDR peak detection, do this as early as possible hdr_update_peak(pass); return true; } static bool pass_scale_main(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!FBOFMT(pass->img.comps)) { PL_TRACE(rr, "Skipping main scaler (no FBOs)"); return true; } struct img *img = &pass->img; struct pl_sample_src src = { .components = img->comps, .new_w = abs(pl_rect_w(pass->dst_rect)), .new_h = abs(pl_rect_h(pass->dst_rect)), .rect = img->rect, }; const struct pl_frame *image = &pass->image; bool need_fbo = image->num_overlays > 0; need_fbo |= rr->peak_detect_active && !params->allow_delayed_peak_detect; // Force FBO indirection if this shader is non-resizable int out_w, out_h; if (img->sh && pl_shader_output_size(img->sh, &out_w, &out_h)) need_fbo |= out_w != src.new_w || out_h != src.new_h; struct sampler_info info = sample_src_info(pass, &src); bool use_sigmoid = info.dir == SAMPLER_UP && params->sigmoid_params; bool use_linear = info.dir == SAMPLER_DOWN; // We need to enable the full rendering pipeline if there are any user // shaders / hooks that might depend on it. uint64_t scaling_hooks = PL_HOOK_PRE_OVERLAY | PL_HOOK_PRE_KERNEL | PL_HOOK_POST_KERNEL; uint64_t linear_hooks = PL_HOOK_LINEAR | PL_HOOK_SIGMOID; for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->stages & (scaling_hooks | linear_hooks)) { need_fbo = true; if (params->hooks[i]->stages & linear_hooks) use_linear = true; if (params->hooks[i]->stages & PL_HOOK_SIGMOID) use_sigmoid = true; } } if (info.dir == SAMPLER_NOOP && !need_fbo) { pl_assert(src.new_w == img->w && src.new_h == img->h); PL_TRACE(rr, "Skipping main scaler (would be no-op)"); return true; } if (info.type == SAMPLER_DIRECT && !need_fbo) { img->w = src.new_w; img->h = src.new_h; PL_TRACE(rr, "Skipping main scaler (free sampling)"); return true; } // Hard-disable both sigmoidization and linearization when required if (params->disable_linear_scaling || rr->disable_linear_sdr) use_sigmoid = use_linear = false; // Avoid sigmoidization for HDR content because it clips to [0,1] if (pl_color_space_is_hdr(&img->color)) { use_sigmoid = false; // Also disable linearization if necessary if (rr->disable_linear_hdr) use_linear = false; } if (use_linear || use_sigmoid) { pl_shader_linearize(img_sh(pass, img), &img->color); img->color.transfer = PL_COLOR_TRC_LINEAR; pass_hook(pass, img, PL_HOOK_LINEAR); } if (use_sigmoid) { pl_shader_sigmoidize(img_sh(pass, img), params->sigmoid_params); pass_hook(pass, img, PL_HOOK_SIGMOID); } pass_hook(pass, img, PL_HOOK_PRE_OVERLAY); img->tex = img_tex(pass, img); if (!img->tex) return false; // Draw overlay on top of the intermediate image if needed, accounting // for possible stretching needed due to mismatch between the ref and src struct pl_transform2x2 tf = pl_transform2x2_identity; if (!pl_rect2d_eq(img->rect, image->crop)) { float rx = pl_rect_w(img->rect) / pl_rect_w(image->crop), ry = pl_rect_w(img->rect) / pl_rect_w(image->crop); tf = (struct pl_transform2x2) { .mat = {{{ rx, 0.0 }, { 0.0, ry }}}, .c = { img->rect.x0 - image->crop.x0 * rx, img->rect.y0 - image->crop.y0 * ry }, }; } draw_overlays(pass, img->tex, img->comps, NULL, image->overlays, image->num_overlays, img->color, img->repr, use_sigmoid, &tf); pass_hook(pass, img, PL_HOOK_PRE_KERNEL); src.tex = img_tex(pass, img); pl_shader sh = pl_dispatch_begin_ex(rr->dp, true); dispatch_sampler(pass, sh, &rr->sampler_main, false, &src); *img = (struct img) { .sh = sh, .w = src.new_w, .h = src.new_h, .repr = img->repr, .rect = { 0, 0, src.new_w, src.new_h }, .color = img->color, .comps = img->comps, }; pass_hook(pass, img, PL_HOOK_POST_KERNEL); if (use_sigmoid) pl_shader_unsigmoidize(img_sh(pass, img), params->sigmoid_params); pass_hook(pass, img, PL_HOOK_SCALED); return true; } #define CLEAR_COL(params) \ (float[4]) { \ (params)->background_color[0], \ (params)->background_color[1], \ (params)->background_color[2], \ 1.0 - (params)->background_transparency, \ } static bool pass_output_target(struct pass_state *pass) { const struct pl_render_params *params = pass->params; const struct pl_frame *image = &pass->image; const struct pl_frame *target = &pass->target; pl_renderer rr = pass->rr; struct img *img = &pass->img; pl_shader sh = img_sh(pass, img); // Color management bool prelinearized = false; bool need_conversion = true; assert(image->color.primaries == img->color.primaries); if (img->color.transfer == PL_COLOR_TRC_LINEAR) { if (img->repr.alpha == PL_ALPHA_PREMULTIPLIED) { // Very annoying edge case: since prelinerization happens with // premultiplied alpha, but color mapping happens with independent // alpha, we need to go back to non-linear representation *before* // alpha mode conversion, to avoid distortion img->color.transfer = image->color.transfer; pl_shader_delinearize(sh, &img->color); } else { prelinearized = true; } } // Do all processing in independent alpha, to avoid nonlinear distortions pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_INDEPENDENT); bool need_icc = !params->ignore_icc_profiles && (image->profile.data || target->profile.data) && !pl_icc_profile_equal(&image->profile, &target->profile); if (params->force_icc_lut || params->force_3dlut) need_icc |= !pl_color_space_equal(&image->color, &target->color); need_icc &= !rr->disable_icc; if (params->lut) { struct pl_color_space lut_in = params->lut->color_in; struct pl_color_space lut_out = params->lut->color_out; switch (params->lut_type) { case PL_LUT_UNKNOWN: case PL_LUT_NATIVE: pl_color_space_merge(&lut_in, &image->color); pl_color_space_merge(&lut_out, &image->color); break; case PL_LUT_CONVERSION: pl_color_space_merge(&lut_in, &image->color); pl_color_space_merge(&lut_out, &target->color); // Conversion LUT the highest priority need_icc = false; need_conversion = false; break; case PL_LUT_NORMALIZED: if (!prelinearized) { // PL_LUT_NORMALIZED wants linear input data pl_shader_linearize(sh, &img->color); img->color.transfer = PL_COLOR_TRC_LINEAR; prelinearized = true; } pl_color_space_merge(&lut_in, &img->color); pl_color_space_merge(&lut_out, &img->color); break; } pl_shader_color_map(sh, params->color_map_params, image->color, lut_in, NULL, prelinearized); if (params->lut_type == PL_LUT_NORMALIZED) { GLSLF("color.rgb *= vec3(1.0/%s); \n", SH_FLOAT(pl_color_transfer_nominal_peak(lut_in.transfer))); } pl_shader_custom_lut(sh, params->lut, &rr->lut_state[LUT_PARAMS]); if (params->lut_type == PL_LUT_NORMALIZED) { GLSLF("color.rgb *= vec3(%s); \n", SH_FLOAT(pl_color_transfer_nominal_peak(lut_out.transfer))); } if (params->lut_type != PL_LUT_CONVERSION) { pl_shader_color_map(sh, params->color_map_params, lut_out, img->color, NULL, false); } } #ifdef PL_HAVE_LCMS if (need_icc) { struct pl_icc_color_space src = { .color = image->color, .profile = image->profile, }; struct pl_icc_color_space dst = { .color = target->color, .profile = target->profile, }; if (params->ignore_icc_profiles) src.profile = dst.profile = (struct pl_icc_profile) {0}; struct pl_icc_result res; bool ok = pl_icc_update(sh, &src, &dst, &rr->icc_state, &res, PL_DEF(params->icc_params, params->lut3d_params)); if (!ok) { rr->disable_icc = true; goto fallback; } // current -> ICC in pl_shader_color_map(sh, params->color_map_params, image->color, res.src_color, &rr->tone_map_state, prelinearized); // ICC in -> ICC out pl_icc_apply(sh, &rr->icc_state); // ICC out -> target pl_shader_color_map(sh, params->color_map_params, res.dst_color, target->color, NULL, false); need_conversion = false; } fallback: #else // !PL_HAVE_LCMS if (need_icc) { PL_WARN(rr, "An ICC profile was set, but libplacebo is built without " "support for LittleCMS! Disabling.."); rr->disable_icc = true; } #endif if (need_conversion) { // current -> target pl_shader_color_map(sh, params->color_map_params, image->color, target->color, &rr->tone_map_state, prelinearized); } // Apply color blindness simulation if requested if (params->cone_params) pl_shader_cone_distort(sh, target->color, params->cone_params); enum pl_lut_type lut_type = guess_frame_lut_type(target, true); if (lut_type == PL_LUT_NORMALIZED || lut_type == PL_LUT_CONVERSION) pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]); bool need_blend = params->blend_against_tiles || !target->repr.alpha; if (img->comps == 4 && need_blend) { if (params->blend_against_tiles) { static const float zero[2][3] = {0}; const float (*color)[3] = params->tile_colors; if (memcmp(color, zero, sizeof(zero)) == 0) color = pl_render_default_params.tile_colors; int size = PL_DEF(params->tile_size, pl_render_default_params.tile_size); GLSLH("#define bg_tile_a vec3(%s, %s, %s) \n", SH_FLOAT(color[0][0]), SH_FLOAT(color[0][1]), SH_FLOAT(color[0][2])); GLSLH("#define bg_tile_b vec3(%s, %s, %s) \n", SH_FLOAT(color[1][0]), SH_FLOAT(color[1][1]), SH_FLOAT(color[1][2])); GLSL("%s tile = lessThan(fract(gl_FragCoord.xy * %s), vec2(0.5)); \n" "vec3 bg_color = tile.x == tile.y ? bg_tile_a : bg_tile_b; \n", sh_bvec(sh, 2), SH_FLOAT(1.0 / size)); } else { GLSLH("#define bg_color vec3(%s, %s, %s) \n", SH_FLOAT(params->background_color[0]), SH_FLOAT(params->background_color[1]), SH_FLOAT(params->background_color[2])); } pl_assert(img->repr.alpha != PL_ALPHA_PREMULTIPLIED); GLSL("color = vec4(mix(bg_color, color.rgb, color.a), 1.0); \n"); img->repr.alpha = PL_ALPHA_UNKNOWN; img->comps = 3; } // Apply the color scale separately, after encoding is done, to make sure // that the intermediate FBO (if any) has the correct precision. struct pl_color_repr repr = target->repr; float scale = pl_color_repr_normalize(&repr); if (lut_type != PL_LUT_CONVERSION) pl_shader_encode_color(sh, &repr); if (lut_type == PL_LUT_NATIVE) { pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]); pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_PREMULTIPLIED); } // Rotation handling struct pl_rect2d dst_rect = pass->dst_rect; if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) { PL_SWAP(dst_rect.x0, dst_rect.y0); PL_SWAP(dst_rect.x1, dst_rect.y1); PL_SWAP(img->w, img->h); sh->transpose = true; } pass_hook(pass, img, PL_HOOK_OUTPUT); sh = NULL; const struct pl_plane *ref = &target->planes[pass->dst_ref]; bool flipped_x = dst_rect.x1 < dst_rect.x0, flipped_y = dst_rect.y1 < dst_rect.y0; if (!params->skip_target_clearing && pl_frame_is_cropped(target)) pl_frame_clear_rgba(rr->gpu, target, CLEAR_COL(params)); for (int p = 0; p < target->num_planes; p++) { const struct pl_plane *plane = &target->planes[p]; float rx = (float) plane->texture->params.w / ref->texture->params.w, ry = (float) plane->texture->params.h / ref->texture->params.h; // Only accept integer scaling ratios. This accounts for the fact // that fractionally subsampled planes get rounded up to the // nearest integer size, which we want to over-render. float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = plane->shift_x, sy = plane->shift_y; struct pl_rect2df dst_rectf = { .x0 = (dst_rect.x0 - sx) * rrx, .y0 = (dst_rect.y0 - sy) * rry, .x1 = (dst_rect.x1 - sx) * rrx, .y1 = (dst_rect.y1 - sy) * rry, }; // Normalize to make the math easier pl_rect2df_normalize(&dst_rectf); // Round the output rect int rx0 = floorf(dst_rectf.x0), ry0 = floorf(dst_rectf.y0), rx1 = ceilf(dst_rectf.x1), ry1 = ceilf(dst_rectf.y1); PL_TRACE(rr, "Subsampled target %d: {%f %f %f %f} -> {%d %d %d %d}", p, dst_rectf.x0, dst_rectf.y0, dst_rectf.x1, dst_rectf.y1, rx0, ry0, rx1, ry1); if (target->num_planes > 1) { // Planar output, so we need to sample from an intermediate FBO struct pl_sample_src src = { .tex = img_tex(pass, img), .new_w = rx1 - rx0, .new_h = ry1 - ry0, .rect = { .x0 = (rx0 - dst_rectf.x0) / rrx, .x1 = (rx1 - dst_rectf.x0) / rrx, .y0 = (ry0 - dst_rectf.y0) / rry, .y1 = (ry1 - dst_rectf.y0) / rry, }, }; if (!src.tex) { PL_ERR(rr, "Output requires multiple planes, but FBOs are " "unavailable. This combination is unsupported."); return false; } PL_TRACE(rr, "Sampling %dx%d img aligned from {%f %f %f %f}", pass->img.w, pass->img.h, src.rect.x0, src.rect.y0, src.rect.x1, src.rect.y1); for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] < 0) continue; src.component_mask |= 1 << plane->component_mapping[c]; } sh = pl_dispatch_begin(rr->dp); dispatch_sampler(pass, sh, &rr->samplers_dst[p], !plane->texture->params.storable, &src); } else { // Single plane, so we can directly re-use the img shader unless // it's incompatible with the FBO capabilities bool is_comp = pl_shader_is_compute(img_sh(pass, img)); if (is_comp && !plane->texture->params.storable) { if (!img_tex(pass, img)) { PL_ERR(rr, "Rendering requires compute shaders, but output " "is not storable, and FBOs are unavailable. This " "combination is unsupported."); return false; } } sh = img_sh(pass, img); img->sh = NULL; } if (params->dither_params) { // Ignore dithering for > 16-bit FBOs by default, since it makes // little sense to do so (and probably just adds errors) int depth = repr.bits.sample_depth; if (depth && (depth <= 16 || params->force_dither)) pl_shader_dither(sh, depth, &rr->dither_state, params->dither_params); } GLSL("color *= vec4(1.0 / %s); \n", SH_FLOAT(scale)); swizzle_color(sh, plane->components, plane->component_mapping, false); bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &sh, .target = plane->texture, .blend_params = params->blend_params, .rect = { .x0 = flipped_x ? rx1 : rx0, .y0 = flipped_y ? ry1 : ry0, .x1 = flipped_x ? rx0 : rx1, .y1 = flipped_y ? ry0 : ry1, }, )); if (!ok) return false; // Render any overlays, including overlays that need to be rendered // from the `image` itself, but which couldn't be rendered as // part of the intermediate scaling pass due to missing FBOs. if (image->num_overlays > 0 && !FBOFMT(img->comps)) { // The original image dimensions need to be scaled by the effective // end-to-end scaling ratio to compensate for the mismatch in // pixel coordinates between the image and target. float scale_x = pl_rect_w(dst_rectf) / pl_rect_w(image->crop), scale_y = pl_rect_h(dst_rectf) / pl_rect_h(image->crop); struct pl_transform2x2 iscale = { .mat = {{{ scale_x, 0.0 }, { 0.0, scale_y }}}, .c = { // If the image was rendered with an offset relative to the // target crop, we also need to shift the overlays. dst_rectf.x0 - image->crop.x0 * scale_x, dst_rectf.y0 - image->crop.y0 * scale_y, }, }; draw_overlays(pass, plane->texture, plane->components, plane->component_mapping, image->overlays, image->num_overlays, target->color, target->repr, false, &iscale); } struct pl_transform2x2 tscale = { .mat = {{{ rrx, 0.0 }, { 0.0, rry }}}, .c = { -sx, -sy }, }; draw_overlays(pass, plane->texture, plane->components, plane->component_mapping, target->overlays, target->num_overlays, target->color, target->repr, false, &tscale); } *img = (struct img) {0}; return true; } #define require(expr) \ do { \ if (!(expr)) { \ PL_ERR(rr, "Validation failed: %s (%s:%d)", \ #expr, __FILE__, __LINE__); \ pl_log_stack_trace(rr->log, PL_LOG_ERR); \ return false; \ } \ } while (0) #define validate_plane(plane, param) \ do { \ require((plane).texture); \ require((plane).texture->params.param); \ require((plane).components > 0 && (plane).components <= 4); \ for (int c = 0; c < (plane).components; c++) { \ require((plane).component_mapping[c] >= PL_CHANNEL_NONE && \ (plane).component_mapping[c] <= PL_CHANNEL_A); \ } \ } while (0) #define validate_overlay(overlay) \ do { \ require(!(overlay).tex ^ !(overlay).plane.texture); \ if ((overlay).tex) { \ require((overlay).tex->params.sampleable); \ require((overlay).num_parts >= 0); \ for (int n = 0; n < (overlay).num_parts; n++) { \ const struct pl_overlay_part *p = &(overlay).parts[n]; \ require(pl_rect_w(p->dst) && pl_rect_h(p->dst)); \ } \ } else { \ require((overlay).num_parts == 0); \ require((overlay).plane.texture->params.sampleable); \ require(pl_rect_w((overlay).rect) && pl_rect_h((overlay).rect)); \ } \ } while (0) // Perform some basic validity checks on incoming structs to help catch invalid // API usage. This is not an exhaustive check. In particular, enums are not // bounds checked. This is because most functions accepting enums already // abort() in the default case, and because it's not the intent of this check // to catch all instances of memory corruption - just common logic bugs. static bool validate_structs(pl_renderer rr, const struct pl_frame *image, const struct pl_frame *target) { // Rendering to/from a frame with no planes is technically allowed, but so // pointless that it's more likely to be a user error worth catching. require(image->num_planes > 0 && image->num_planes <= PL_MAX_PLANES); require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES); for (int i = 0; i < image->num_planes; i++) validate_plane(image->planes[i], sampleable); for (int i = 0; i < target->num_planes; i++) validate_plane(target->planes[i], renderable); float src_w = pl_rect_w(image->crop), src_h = pl_rect_h(image->crop); float dst_w = pl_rect_w(target->crop), dst_h = pl_rect_h(target->crop); require(!src_w == !src_h); require(!dst_w == !dst_h); require(image->num_overlays >= 0); require(target->num_overlays >= 0); for (int i = 0; i < image->num_overlays; i++) validate_overlay(image->overlays[i]); for (int i = 0; i < target->num_overlays; i++) validate_overlay(target->overlays[i]); return true; } static inline enum plane_type detect_plane_type(const struct pl_plane *plane, const struct pl_color_repr *repr) { if (pl_color_system_is_ycbcr_like(repr->sys)) { int t = PLANE_INVALID; for (int c = 0; c < plane->components; c++) { switch (plane->component_mapping[c]) { case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue; case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue; case PL_CHANNEL_CB: case PL_CHANNEL_CR: t = PL_MAX(t, PLANE_CHROMA); continue; default: continue; } } pl_assert(t); return t; } // Extra test for exclusive / separated alpha plane if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A) return PLANE_ALPHA; switch (repr->sys) { case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB case PL_COLOR_SYSTEM_RGB: return PLANE_RGB; case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ; // For the switch completeness check case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: case PL_COLOR_SYSTEM_COUNT: break; } pl_unreachable(); } static void fix_refs_and_rects(struct pass_state *pass) { struct pl_frame *image = &pass->image; struct pl_frame *target = &pass->target; // Find the ref planes for (int i = 0; i < image->num_planes; i++) { pass->src_type[i] = detect_plane_type(&image->planes[i], &image->repr); switch (pass->src_type[i]) { case PLANE_RGB: case PLANE_LUMA: case PLANE_XYZ: pass->src_ref = i; break; case PLANE_CHROMA: case PLANE_ALPHA: break; case PLANE_INVALID: pl_unreachable(); } } for (int i = 0; i < target->num_planes; i++) { pass->dst_type[i] = detect_plane_type(&target->planes[i], &target->repr); switch (pass->dst_type[i]) { case PLANE_RGB: case PLANE_LUMA: case PLANE_XYZ: pass->dst_ref = i; break; case PLANE_CHROMA: case PLANE_ALPHA: break; case PLANE_INVALID: pl_unreachable(); } } // Fix the rendering rects struct pl_rect2df *src = &image->crop, *dst = &target->crop; pl_tex src_ref = pass->image.planes[pass->src_ref].texture; pl_tex dst_ref = pass->target.planes[pass->dst_ref].texture; int dst_w = dst_ref->params.w, dst_h = dst_ref->params.h; if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) { src->x1 = src_ref->params.w; src->y1 = src_ref->params.h; }; if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) { dst->x1 = dst_w; dst->y1 = dst_h; } // Compute end-to-end rotation pass->rotation = pl_rotation_normalize(image->rotation - target->rotation); pl_rect2df_rotate(dst, -pass->rotation); // normalize by counter-rotating if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) PL_SWAP(dst_w, dst_h); // Keep track of whether the end-to-end rendering is flipped bool flipped_x = (src->x0 > src->x1) != (dst->x0 > dst->x1), flipped_y = (src->y0 > src->y1) != (dst->y0 > dst->y1); // Normalize both rects to make the math easier pl_rect2df_normalize(src); pl_rect2df_normalize(dst); // Round the output rect and clip it to the framebuffer dimensions float rx0 = roundf(PL_MAX(dst->x0, 0.0)), ry0 = roundf(PL_MAX(dst->y0, 0.0)), rx1 = roundf(PL_MIN(dst->x1, dst_w)), ry1 = roundf(PL_MIN(dst->y1, dst_h)); // Adjust the src rect corresponding to the rounded crop float scale_x = pl_rect_w(*src) / pl_rect_w(*dst), scale_y = pl_rect_h(*src) / pl_rect_h(*dst), base_x = src->x0, base_y = src->y0; src->x0 = base_x + (rx0 - dst->x0) * scale_x; src->x1 = base_x + (rx1 - dst->x0) * scale_x; src->y0 = base_y + (ry0 - dst->y0) * scale_y; src->y1 = base_y + (ry1 - dst->y0) * scale_y; // Update dst_rect to the rounded values and re-apply flip if needed. We // always do this in the `dst` rather than the `src`` because this allows // e.g. polar sampling compute shaders to work. *dst = (struct pl_rect2df) { .x0 = flipped_x ? rx1 : rx0, .y0 = flipped_y ? ry1 : ry0, .x1 = flipped_x ? rx0 : rx1, .y1 = flipped_y ? ry0 : ry1, }; // Copies of the above, for convenience pass->ref_rect = *src; pass->dst_rect = (struct pl_rect2d) { dst->x0, dst->y0, dst->x1, dst->y1, }; } static pl_tex frame_ref(const struct pl_frame *frame) { pl_assert(frame->num_planes); for (int i = 0; i < frame->num_planes; i++) { switch (detect_plane_type(&frame->planes[i], &frame->repr)) { case PLANE_RGB: case PLANE_LUMA: case PLANE_XYZ: return frame->planes[i].texture; case PLANE_CHROMA: case PLANE_ALPHA: continue; case PLANE_INVALID: pl_unreachable(); } } return frame->planes[0].texture; } static void fix_color_space(struct pl_frame *frame) { pl_tex tex = frame_ref(frame); // If the primaries are not known, guess them based on the resolution if (!frame->color.primaries) frame->color.primaries = pl_color_primaries_guess(tex->params.w, tex->params.h); pl_color_space_infer(&frame->color); // For UNORM formats, we can infer the sampled bit depth from the texture // itself. This is ignored for other format types, because the logic // doesn't really work out for them anyways, and it's best not to do // anything too crazy unless the user provides explicit details. struct pl_bit_encoding *bits = &frame->repr.bits; if (!bits->sample_depth && tex->params.format->type == PL_FMT_UNORM) { // Just assume the first component's depth is canonical. This works in // practice, since for cases like rgb565 we want to use the lower depth // anyway. Plus, every format has at least one component. bits->sample_depth = tex->params.format->component_depth[0]; // If we don't know the color depth, assume it spans the full range of // the texture. Otherwise, clamp it to the texture depth. bits->color_depth = PL_DEF(bits->color_depth, bits->sample_depth); bits->color_depth = PL_MIN(bits->color_depth, bits->sample_depth); // If the texture depth is higher than the known color depth, assume // the colors were left-shifted. bits->bit_shift += bits->sample_depth - bits->color_depth; } } static bool pass_infer_state(struct pass_state *pass) { struct pl_frame *image = &pass->image; struct pl_frame *target = &pass->target; if (!validate_structs(pass->rr, image, target)) return false; fix_refs_and_rects(pass); fix_color_space(image); // Infer the target color space info based on the image's pl_color_space_infer_ref(&target->color, &image->color); fix_color_space(target); // Detect the presence of an alpha channel in the frames and explicitly // default the alpha mode in this case, so we can use it to detect whether // or not to strip the alpha channel during rendering. // // Note the different defaults for the image and target, because files // are usually independent but windowing systems usually expect // premultiplied. (We also premultiply for internal rendering, so this // way of doing it avoids a possible division-by-zero path!) if (!image->repr.alpha) { for (int i = 0; i < image->num_planes; i++) { const struct pl_plane *plane = &image->planes[i]; for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] == PL_CHANNEL_A) image->repr.alpha = PL_ALPHA_INDEPENDENT; } } } if (!target->repr.alpha) { for (int i = 0; i < target->num_planes; i++) { const struct pl_plane *plane = &target->planes[i]; for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] == PL_CHANNEL_A) target->repr.alpha = PL_ALPHA_PREMULTIPLIED; } } } return true; } static bool draw_empty_overlays(pl_renderer rr, const struct pl_frame *ptarget, const struct pl_render_params *params) { if (!params->skip_target_clearing) pl_frame_clear_rgba(rr->gpu, ptarget, CLEAR_COL(params)); if (!ptarget->num_overlays) return true; struct pass_state pass = { .rr = rr, .params = params, .target = *ptarget, .info.stage = PL_RENDER_STAGE_FRAME, }; struct pl_frame *target = &pass.target; require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES); for (int i = 0; i < target->num_planes; i++) validate_plane(target->planes[i], renderable); require(target->num_overlays >= 0); for (int i = 0; i < target->num_overlays; i++) validate_overlay(target->overlays[i]); fix_color_space(target); pl_dispatch_callback(rr->dp, &pass, info_callback); pl_dispatch_reset_frame(rr->dp); pl_tex ref = frame_ref(target); for (int p = 0; p < target->num_planes; p++) { const struct pl_plane *plane = &target->planes[p]; // Math replicated from `pass_output_target` float rx = (float) plane->texture->params.w / ref->params.w, ry = (float) plane->texture->params.h / ref->params.h; float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = plane->shift_x, sy = plane->shift_y; struct pl_transform2x2 tscale = { .mat = {{{ rrx, 0.0 }, { 0.0, rry }}}, .c = { -sx, -sy }, }; draw_overlays(&pass, plane->texture, plane->components, plane->component_mapping, target->overlays, target->num_overlays, target->color, target->repr, false, &tscale); } return true; } bool pl_render_image(pl_renderer rr, const struct pl_frame *pimage, const struct pl_frame *ptarget, const struct pl_render_params *params) { params = PL_DEF(params, &pl_render_default_params); pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants); if (!pimage) return draw_empty_overlays(rr, ptarget, params); struct pass_state pass = { .rr = rr, .params = params, .image = *pimage, .target = *ptarget, .info.stage = PL_RENDER_STAGE_FRAME, }; if (!pass_infer_state(&pass)) return false; pass.tmp = pl_tmp(NULL), pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool)); pl_dispatch_callback(rr->dp, &pass, info_callback); pl_dispatch_reset_frame(rr->dp); for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->reset) params->hooks[i]->reset(params->hooks[i]->priv); } if (!pass_read_image(&pass)) goto error; if (!pass_scale_main(&pass)) goto error; if (!pass_output_target(&pass)) goto error; pl_free(pass.tmp); return true; error: pl_dispatch_abort(rr->dp, &pass.img.sh); pl_free(pass.tmp); PL_ERR(rr, "Failed rendering image!"); return false; } static uint64_t render_params_hash(const struct pl_render_params *params_orig) { struct pl_render_params params = *params_orig; uint64_t hash = 0; #define HASH_PTR(ptr) \ do { \ if (ptr) { \ pl_hash_merge(&hash, pl_mem_hash(ptr, sizeof(*ptr))); \ ptr = NULL; \ } \ } while (0) #define HASH_FILTER(scaler) \ do { \ if (scaler) { \ struct pl_filter_config filter = *scaler; \ HASH_PTR(filter.kernel); \ HASH_PTR(filter.window); \ pl_hash_merge(&hash, pl_mem_hash(&filter, sizeof(filter))); \ scaler = NULL; \ } \ } while (0) HASH_FILTER(params.upscaler); HASH_FILTER(params.downscaler); HASH_PTR(params.deband_params); HASH_PTR(params.sigmoid_params); HASH_PTR(params.color_adjustment); HASH_PTR(params.peak_detect_params); HASH_PTR(params.color_map_params); HASH_PTR(params.dither_params); // Hash all hooks for (int i = 0; i < params.num_hooks; i++) { const struct pl_hook *hook = params.hooks[i]; if (hook->stages == PL_HOOK_OUTPUT) continue; // ignore hooks only relevant to pass_output_target pl_hash_merge(&hash, pl_mem_hash(hook, sizeof(*hook))); } params.hooks = NULL; // Hash the LUT by only looking at the signature if (params.lut) { pl_hash_merge(&hash, params.lut->signature); params.lut = NULL; } #define CLEAR(field) field = (__typeof__(field)) {0} // Clear out fields only relevant to pl_render_image_mix CLEAR(params.frame_mixer); CLEAR(params.preserve_mixing_cache); CLEAR(params.skip_caching_single_frame); memset(params.background_color, 0, sizeof(params.background_color)); CLEAR(params.background_transparency); CLEAR(params.skip_target_clearing); CLEAR(params.blend_against_tiles); memset(params.tile_colors, 0, sizeof(params.tile_colors)); CLEAR(params.tile_size); // Clear out fields only relevant to pass_output_target CLEAR(params.blend_params); CLEAR(params.cone_params); CLEAR(params.icc_params); CLEAR(params.lut3d_params); CLEAR(params.ignore_icc_profiles); CLEAR(params.force_icc_lut); CLEAR(params.force_3dlut); CLEAR(params.force_dither); CLEAR(params.dynamic_constants); CLEAR(params.allow_delayed_peak_detect); pl_hash_merge(&hash, pl_mem_hash(¶ms, sizeof(params))); return hash; } #define MAX_MIX_FRAMES 16 bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images, const struct pl_frame *ptarget, const struct pl_render_params *params) { if (!images->num_frames) return pl_render_image(rr, NULL, ptarget, params); params = PL_DEF(params, &pl_render_default_params); uint64_t params_hash = render_params_hash(params); pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants); require(images->num_frames >= 1); for (int i = 0; i < images->num_frames - 1; i++) require(images->timestamps[i] <= images->timestamps[i+1]); // As the canonical reference, find the nearest neighbour frame const struct pl_frame *refimg = images->frames[0]; float best = fabsf(images->timestamps[0]); for (int i = 1; i < images->num_frames; i++) { float dist = fabsf(images->timestamps[i]); if (dist < best) { refimg = images->frames[i]; best = dist; continue; } else { break; } } struct pass_state pass = { .rr = rr, .params = params, .image = *refimg, .target = *ptarget, .info.stage = PL_RENDER_STAGE_BLEND, }; if (rr->disable_mixing || !FBOFMT(4)) goto fallback; if (!pass_infer_state(&pass)) return false; int out_w = abs(pl_rect_w(pass.dst_rect)), out_h = abs(pl_rect_h(pass.dst_rect)); int fidx = 0; struct cached_frame frames[MAX_MIX_FRAMES]; float weights[MAX_MIX_FRAMES]; float wsum = 0.0; pass.tmp = pl_tmp(NULL); // Garbage collect the cache by evicting all frames from the cache that are // not determined to still be required for (int i = 0; i < rr->frames.num; i++) rr->frames.elem[i].evict = true; // Traverse the input frames and determine/prepare the ones we need for (int i = 0; i < images->num_frames; i++) { uint64_t sig = images->signatures[i]; float pts = images->timestamps[i]; PL_TRACE(rr, "Considering image with signature 0x%llx, pts %f", (unsigned long long) sig, pts); // Combining images with different rotations is basically unfeasible if (pl_rotation_normalize(images->frames[i]->rotation - refimg->rotation)) { PL_TRACE(rr, " -> Skipping: incompatible rotation"); continue; } float weight; const struct pl_filter_config *mixer = params->frame_mixer; bool single_frame = !mixer || images->num_frames == 1; if (single_frame) { // Only render the refimg, ignore others if (images->frames[i] == refimg) { weight = 1.0; } else { PL_TRACE(rr, " -> Skipping: no frame mixer"); continue; } // For backwards compatibility, treat !kernel as oversample } else if (!mixer->kernel || mixer->kernel->weight == oversample) { // Compute the visible interval [pts, end] of this frame float end = i+1 < images->num_frames ? images->timestamps[i+1] : INFINITY; if (pts > images->vsync_duration || end < 0.0) { PL_TRACE(rr, " -> Skipping: no intersection with vsync"); continue; } else { pts = PL_MAX(pts, 0.0); end = PL_MIN(end, images->vsync_duration); pl_assert(end >= pts); } // Weight is the fraction of vsync interval that frame is visible weight = (end - pts) / images->vsync_duration; PL_TRACE(rr, " -> Frame [%f, %f] intersects [%f, %f] = weight %f", pts, end, 0.0, images->vsync_duration, weight); if (weight < mixer->kernel->params[0]) { PL_TRACE(rr, " (culling due to threshold)"); weight = 0.0; } } else { if (fabsf(pts) >= mixer->kernel->radius) { PL_TRACE(rr, " -> Skipping: outside filter radius (%f)", mixer->kernel->radius); continue; } // Weight is directly sampled from the filter weight = pl_filter_sample(mixer, pts); PL_TRACE(rr, " -> Filter offset %f = weight %f", pts, weight); } struct cached_frame *f = NULL; for (int j = 0; j < rr->frames.num; j++) { if (rr->frames.elem[j].signature == sig) { f = &rr->frames.elem[j]; f->evict = false; break; } } // Skip frames with negligible contributions. Do this after the loop // above to make sure these frames don't get evicted just yet, and // also exclude the reference image from this optimization to ensure // that we always have at least one frame. const float cutoff = 1e-3; if (fabsf(weight) <= cutoff && images->frames[i] != refimg) { PL_TRACE(rr, " -> Skipping: weight (%f) below threshold (%f)", weight, cutoff); continue; } bool skip_cache = single_frame && params->skip_caching_single_frame; if (!f && skip_cache) { PL_TRACE(rr, "Single frame not found in cache, bypassing"); goto fallback; } if (!f) { // Signature does not exist in the cache at all yet, // so grow the cache by this entry. PL_ARRAY_GROW(rr, rr->frames); f = &rr->frames.elem[rr->frames.num++]; *f = (struct cached_frame) { .signature = sig, .color = images->frames[i]->color, .profile = images->frames[i]->profile, }; } // Check to see if we can blindly reuse this cache entry. This is the // case if either the params are compatible, or the user doesn't care bool strict_reuse = !params->preserve_mixing_cache || skip_cache; bool can_reuse = f->tex; if (can_reuse && strict_reuse) { can_reuse = f->tex->params.w == out_w && f->tex->params.h == out_h && f->params_hash == params_hash; } if (!can_reuse && skip_cache) { PL_TRACE(rr, "Single frame cache entry invalid, bypassing"); goto fallback; } if (!can_reuse) { // If we can't reuse the entry, we need to re-render this frame PL_TRACE(rr, " -> Cached texture missing or invalid.. (re)creating"); if (!f->tex) { if (PL_ARRAY_POP(rr->frame_fbos, &f->tex)) pl_tex_invalidate(rr->gpu, f->tex); } bool ok = pl_tex_recreate(rr->gpu, &f->tex, pl_tex_params( .w = out_w, .h = out_h, .format = rr->fbofmt[4], .sampleable = true, .renderable = true, .blit_dst = rr->fbofmt[4]->caps & PL_FMT_CAP_BLITTABLE, .storable = rr->fbofmt[4]->caps & PL_FMT_CAP_STORABLE, )); if (!ok) { PL_ERR(rr, "Could not create intermediate texture for " "frame mixing.. disabling!"); rr->disable_mixing = true; goto fallback; } struct pass_state inter_pass = { .rr = rr, .tmp = pass.tmp, .params = pass.params, .fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool)), .image = *images->frames[i], .target = *ptarget, .info.stage = PL_RENDER_STAGE_FRAME, }; // Render a single frame up to `pass_output_target` if (!pass_infer_state(&inter_pass)) goto error; pl_dispatch_callback(rr->dp, &inter_pass, info_callback); pl_dispatch_reset_frame(rr->dp); for (int n = 0; n < params->num_hooks; n++) { if (params->hooks[n]->reset) params->hooks[n]->reset(params->hooks[n]->priv); } if (!pass_read_image(&inter_pass)) goto error; if (!pass_scale_main(&inter_pass)) goto error; pl_assert(inter_pass.img.w == out_w && inter_pass.img.h == out_h); if (inter_pass.img.tex) { struct pl_tex_blit_params blit = { .src = inter_pass.img.tex, .dst = f->tex, }; if (blit.src->params.blit_src && blit.dst->params.blit_dst) { pl_tex_blit(rr->gpu, &blit); } else { pl_tex_blit_raster(rr->gpu, rr->dp, &blit); } } else { ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &inter_pass.img.sh, .target = f->tex, )); if (!ok) goto error; } f->params_hash = params_hash; f->color = inter_pass.img.color; f->comps = inter_pass.img.comps; pl_assert(inter_pass.img.repr.alpha != PL_ALPHA_INDEPENDENT); } pl_assert(fidx < MAX_MIX_FRAMES); frames[fidx] = *f; weights[fidx] = weight; wsum += weight; fidx++; } // Evict the frames we *don't* need for (int i = 0; i < rr->frames.num; ) { if (rr->frames.elem[i].evict) { PL_TRACE(rr, "Evicting frame with signature %llx from cache", (unsigned long long) rr->frames.elem[i].signature); PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex); PL_ARRAY_REMOVE_AT(rr->frames, i); continue; } else { i++; } } // Sample and mix the output color pl_dispatch_callback(rr->dp, &pass, info_callback); pl_dispatch_reset_frame(rr->dp); pass.info.index = fidx; pl_shader sh = pl_dispatch_begin(rr->dp); sh_describe(sh, "frame mixing"); sh->res.output = PL_SHADER_SIG_COLOR; sh->output_w = out_w; sh->output_h = out_h; GLSL("vec4 color; \n" "// pl_render_image_mix \n" "{ \n" "vec4 mix_color = vec4(0.0); \n"); // Mix in the image color space, but using the transfer function of // (arbitrarily) the latest rendered frame. This avoids unnecessary ping // ponging between linear and nonlinear light when combining linearly // scaled images with frame mixing. struct pl_color_space mix_color = pass.image.color; pl_assert(fidx > 0); mix_color.transfer = frames[fidx - 1].color.transfer; int comps = 0; for (int i = 0; i < fidx; i++) { const struct pl_tex_params *tpars = &frames[i].tex->params; // Use linear sampling if desired and possible enum pl_tex_sample_mode sample_mode = PL_TEX_SAMPLE_NEAREST; if ((tpars->w != out_w || tpars->h != out_h) && (tpars->format->caps & PL_FMT_CAP_LINEAR)) { sample_mode = PL_TEX_SAMPLE_LINEAR; } ident_t pos, tex = sh_bind(sh, frames[i].tex, PL_TEX_ADDRESS_CLAMP, sample_mode, "frame", NULL, &pos, NULL, NULL); GLSL("color = %s(%s, %s); \n", sh_tex_fn(sh, *tpars), tex, pos); // Note: This ignores differences in ICC profile, which we decide to // just simply not care about. Doing that properly would require // converting between different image profiles, and the headache of // finagling that state is just not worth it because this is an // exceptionally unlikely hypothetical. pl_shader_color_map(sh, NULL, frames[i].color, mix_color, NULL, false); ident_t weight = "1.0"; if (weights[i] != wsum) { // skip loading weight for nearest neighbour weight = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("weight"), .data = &(float){ weights[i] / wsum }, .dynamic = true, }); } GLSL("mix_color += %s * color; \n", weight); comps = PL_MAX(comps, frames[i].comps); } GLSL("color = mix_color; \n" "} \n"); // Dispatch this to the destination pass.fbos_used = pl_calloc(pass.tmp, rr->fbos.num, sizeof(bool)); pass.img = (struct img) { .sh = sh, .w = out_w, .h = out_h, .comps = comps, .color = mix_color, .repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, .alpha = comps >= 4 ? PL_ALPHA_PREMULTIPLIED : PL_ALPHA_UNKNOWN, }, }; for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->reset) params->hooks[i]->reset(params->hooks[i]->priv); } if (!pass_output_target(&pass)) goto fallback; pl_free(pass.tmp); return true; error: PL_ERR(rr, "Could not render image for frame mixing.. disabling!"); rr->disable_mixing = true; // fall through fallback: pl_free(pass.tmp); return pl_render_image(rr, refimg, ptarget, params); } void pl_frame_set_chroma_location(struct pl_frame *frame, enum pl_chroma_location chroma_loc) { pl_tex ref = frame_ref(frame); if (ref) { // Texture dimensions are already known, so apply the chroma location // only to subsampled planes int ref_w = ref->params.w, ref_h = ref->params.h; for (int i = 0; i < frame->num_planes; i++) { struct pl_plane *plane = &frame->planes[i]; pl_tex tex = plane->texture; bool subsampled = tex->params.w < ref_w || tex->params.h < ref_h; if (subsampled) pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y); } } else { // Texture dimensions are not yet known, so apply the chroma location // to all chroma planes, regardless of subsampling for (int i = 0; i < frame->num_planes; i++) { struct pl_plane *plane = &frame->planes[i]; if (detect_plane_type(plane, &frame->repr) == PLANE_CHROMA) pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y); } } } void pl_frame_from_swapchain(struct pl_frame *out_frame, const struct pl_swapchain_frame *frame) { pl_tex fbo = frame->fbo; int num_comps = fbo->params.format->num_components; if (!frame->color_repr.alpha) num_comps = PL_MIN(num_comps, 3); *out_frame = (struct pl_frame) { .num_planes = 1, .planes = {{ .texture = fbo, .components = num_comps, .component_mapping = {0, 1, 2, 3}, }}, .crop = { 0, 0, fbo->params.w, fbo->params.h }, .repr = frame->color_repr, .color = frame->color_space, }; if (frame->flipped) PL_SWAP(out_frame->crop.y0, out_frame->crop.y1); } bool pl_frame_is_cropped(const struct pl_frame *frame) { int x0 = roundf(PL_MIN(frame->crop.x0, frame->crop.x1)), y0 = roundf(PL_MIN(frame->crop.y0, frame->crop.y1)), x1 = roundf(PL_MAX(frame->crop.x0, frame->crop.x1)), y1 = roundf(PL_MAX(frame->crop.y0, frame->crop.y1)); pl_tex ref = frame_ref(frame); pl_assert(ref); if (!x0 && !x1) x1 = ref->params.w; if (!y0 && !y1) y1 = ref->params.h; return x0 > 0 || y0 > 0 || x1 < ref->params.w || y1 < ref->params.h; } void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame, const float rgba[4]) { struct pl_color_repr repr = frame->repr; struct pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL); pl_transform3x3_invert(&tr); float encoded[3] = { rgba[0], rgba[1], rgba[2] }; pl_transform3x3_apply(&tr, encoded); float mult = frame->repr.alpha == PL_ALPHA_PREMULTIPLIED ? rgba[3] : 1.0; for (int p = 0; p < frame->num_planes; p++) { const struct pl_plane *plane = &frame->planes[p]; float clear[4] = { 0.0, 0.0, 0.0, rgba[3] }; for (int c = 0; c < plane->components; c++) { int ch = plane->component_mapping[c]; if (ch >= 0 && ch < 3) clear[c] = mult * encoded[plane->component_mapping[c]]; } pl_tex_clear(gpu, plane->texture, clear); } } libplacebo-v4.192.1/src/shaders.c000066400000000000000000001154601417677245700165710ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "shaders.h" pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params) { pl_shader sh = pl_alloc_ptr(NULL, sh); *sh = (struct pl_shader) { .log = log, .mutable = true, }; // Ensure there's always at least one `tmp` object PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL)); if (params) sh->res.params = *params; return sh; } void pl_shader_free(pl_shader *psh) { pl_shader sh = *psh; if (!sh) return; for (int i = 0; i < sh->tmp.num; i++) pl_ref_deref(&sh->tmp.elem[i]); pl_free_ptr(psh); } void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params) { for (int i = 0; i < sh->tmp.num; i++) pl_ref_deref(&sh->tmp.elem[i]); struct pl_shader new = { .log = sh->log, .mutable = true, // Preserve array allocations .tmp.elem = sh->tmp.elem, .vas.elem = sh->vas.elem, .vars.elem = sh->vars.elem, .descs.elem = sh->descs.elem, .consts.elem = sh->consts.elem, .steps.elem = sh->steps.elem, }; if (params) new.res.params = *params; // Preserve buffer allocations for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++) new.buffers[i] = (pl_str) { .buf = sh->buffers[i].buf }; *sh = new; PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_new(NULL)); } bool pl_shader_is_failed(const pl_shader sh) { return sh->failed; } struct pl_glsl_version sh_glsl(const pl_shader sh) { if (SH_PARAMS(sh).glsl.version) return SH_PARAMS(sh).glsl; if (SH_GPU(sh)) return SH_GPU(sh)->glsl; return (struct pl_glsl_version) { .version = 130 }; } bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem) { pl_assert(bw && bh); int *sh_bw = &sh->res.compute_group_size[0]; int *sh_bh = &sh->res.compute_group_size[1]; struct pl_glsl_version glsl = sh_glsl(sh); if (!glsl.compute) { PL_TRACE(sh, "Disabling compute shader due to missing `compute` support"); return false; } if (sh->res.compute_shmem + mem > glsl.max_shmem_size) { PL_TRACE(sh, "Disabling compute shader due to insufficient shmem"); return false; } if (sh->type == SH_FRAGMENT) { PL_TRACE(sh, "Disabling compute shader because shader is already marked " "as fragment shader"); return false; } if (bw > glsl.max_group_size[0] || bh > glsl.max_group_size[1] || (bw * bh) > glsl.max_group_threads) { if (!flex) { PL_TRACE(sh, "Disabling compute shader due to exceeded group " "thread count."); return false; } else { // Pick better group sizes bw = PL_MIN(bw, glsl.max_group_size[0]); bh = glsl.max_group_threads / bw; } } sh->res.compute_shmem += mem; // If the current shader is either not a compute shader, or we have no // choice but to override the metadata, always do so if (sh->type != SH_COMPUTE || (sh->flexible_work_groups && !flex)) { *sh_bw = bw; *sh_bh = bh; sh->type = SH_COMPUTE; return true; } // If both shaders are flexible, pick the larger of the two if (sh->flexible_work_groups && flex) { *sh_bw = PL_MAX(*sh_bw, bw); *sh_bh = PL_MAX(*sh_bh, bh); pl_assert(*sh_bw * *sh_bh <= glsl.max_group_threads); return true; } // If the other shader is rigid but this is flexible, change nothing if (flex) return true; // If neither are flexible, make sure the parameters match pl_assert(!flex && !sh->flexible_work_groups); if (bw != *sh_bw || bh != *sh_bh) { PL_TRACE(sh, "Disabling compute shader due to incompatible group " "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh); sh->res.compute_shmem -= mem; return false; } return true; } bool pl_shader_is_compute(const pl_shader sh) { return sh->type == SH_COMPUTE; } bool pl_shader_output_size(const pl_shader sh, int *w, int *h) { if (!sh->output_w || !sh->output_h) return false; *w = sh->transpose ? sh->output_h : sh->output_w; *h = sh->transpose ? sh->output_w : sh->output_h; return true; } ident_t sh_fresh(pl_shader sh, const char *name) { return pl_asprintf(SH_TMP(sh), "_%s_%d_%u", PL_DEF(name, "var"), sh->fresh++, SH_PARAMS(sh).id); } ident_t sh_var(pl_shader sh, struct pl_shader_var sv) { sv.var.name = sh_fresh(sh, sv.var.name); sv.data = pl_memdup(SH_TMP(sh), sv.data, pl_var_host_layout(0, &sv.var).size); PL_ARRAY_APPEND(sh, sh->vars, sv); return (ident_t) sv.var.name; } static void merge_access(enum pl_desc_access *a, enum pl_desc_access b) { if (*a != b) *a = PL_DESC_ACCESS_READWRITE; } ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd) { switch (sd.desc.type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: // Skip re-attaching the same buffer desc twice // FIXME: define aliases if the variable names differ for (int i = 0; i < sh->descs.num; i++) { if (sh->descs.elem[i].binding.object == sd.binding.object) { merge_access(&sh->descs.elem[i].desc.access, sd.desc.access); sh->descs.elem[i].memory |= sd.memory; return (ident_t) sh->descs.elem[i].desc.name; } } size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars; sd.buffer_vars = pl_memdup(SH_TMP(sh), sd.buffer_vars, bsize); break; case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: pl_assert(!sd.num_buffer_vars); break; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } sd.desc.name = sh_fresh(sh, sd.desc.name); PL_ARRAY_APPEND(sh, sh->descs, sd); return (ident_t) sd.desc.name; } ident_t sh_const(pl_shader sh, struct pl_shader_const sc) { if (sh->res.params.dynamic_constants && !sc.compile_time) { return sh_var(sh, (struct pl_shader_var) { .var = { .name = sc.name, .type = sc.type, .dim_v = 1, .dim_m = 1, .dim_a = 1, }, .data = sc.data, }); } sc.name = sh_fresh(sh, sc.name); pl_gpu gpu = SH_GPU(sh); if (gpu && gpu->limits.max_constants) { sc.data = pl_memdup(SH_TMP(sh), sc.data, pl_var_type_size(sc.type)); PL_ARRAY_APPEND(sh, sh->consts, sc); return (ident_t) sc.name; } // Fallback for GPUs without specialization constants switch (sc.type) { case PL_VAR_SINT: GLSLH("const int %s = %d; \n", sc.name, *(int *) sc.data); return (ident_t) sc.name; case PL_VAR_UINT: GLSLH("const uint %s = %uu; \n", sc.name, *(unsigned int *) sc.data); return (ident_t) sc.name; case PL_VAR_FLOAT: GLSLH("const float %s = %f; \n", sc.name, *(float *) sc.data); return (ident_t) sc.name; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } ident_t sh_const_int(pl_shader sh, const char *name, int val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .name = name, .data = &val, }); } ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_UINT, .name = name, .data = &val, }); } ident_t sh_const_float(pl_shader sh, const char *name, float val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_FLOAT, .name = name, .data = &val, }); } ident_t sh_attr_vec2(pl_shader sh, const char *name, const struct pl_rect2df *rc) { pl_gpu gpu = SH_GPU(sh); if (!gpu) { SH_FAIL(sh, "Failed adding vertex attr '%s': No GPU available!", name); return NULL; } pl_fmt fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2); if (!fmt) { SH_FAIL(sh, "Failed adding vertex attr '%s': no vertex fmt!", name); return NULL; } float vals[4][2] = { { rc->x0, rc->y0 }, { rc->x1, rc->y0 }, { rc->x0, rc->y1 }, { rc->x1, rc->y1 }, }; float *data = pl_memdup(SH_TMP(sh), &vals[0][0], sizeof(vals)); struct pl_shader_va va = { .attr = { .name = sh_fresh(sh, name), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, .data = { &data[0], &data[2], &data[4], &data[6] }, }; PL_ARRAY_APPEND(sh, sh->vas, va); return (ident_t) va.attr.name; } ident_t sh_bind(pl_shader sh, pl_tex tex, enum pl_tex_address_mode address_mode, enum pl_tex_sample_mode sample_mode, const char *name, const struct pl_rect2df *rect, ident_t *out_pos, ident_t *out_size, ident_t *out_pt) { if (pl_tex_params_dimension(tex->params) != 2) { SH_FAIL(sh, "Failed binding texture '%s': not a 2D texture!", name); return NULL; } if (!tex->params.sampleable) { SH_FAIL(sh, "Failed binding texture '%s': texture not sampleable!", name); return NULL; } ident_t itex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = name, .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = tex, .address_mode = address_mode, .sample_mode = sample_mode, }, }); float sx, sy; if (tex->sampler_type == PL_SAMPLER_RECT) { sx = 1.0; sy = 1.0; } else { sx = 1.0 / tex->params.w; sy = 1.0 / tex->params.h; } if (out_pos) { struct pl_rect2df full = { .x1 = tex->params.w, .y1 = tex->params.h, }; rect = PL_DEF(rect, &full); *out_pos = sh_attr_vec2(sh, "tex_coord", &(struct pl_rect2df) { .x0 = sx * rect->x0, .y0 = sy * rect->y0, .x1 = sx * rect->x1, .y1 = sy * rect->y1, }); } if (out_size) { *out_size = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_size"), .data = &(float[2]) {tex->params.w, tex->params.h}, }); } if (out_pt) { *out_pt = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_pt"), .data = &(float[2]) {sx, sy}, }); } return itex; } bool sh_buf_desc_append(void *alloc, pl_gpu gpu, struct pl_shader_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var) { struct pl_buffer_var bv = { .var = new_var }; size_t cur_size = sh_buf_desc_size(buf_desc); switch (buf_desc->desc.type) { case PL_DESC_BUF_UNIFORM: bv.layout = pl_std140_layout(cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size) return false; break; case PL_DESC_BUF_STORAGE: bv.layout = pl_std430_layout(cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size) return false; break; case PL_DESC_INVALID: case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: case PL_DESC_TYPE_COUNT: pl_unreachable(); } if (out_layout) *out_layout = bv.layout; PL_ARRAY_APPEND_RAW(alloc, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv); return true; } size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc) { if (!buf_desc->num_buffer_vars) return 0; const struct pl_buffer_var *last; last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1]; return last->layout.offset + last->layout.size; } void sh_append(pl_shader sh, enum pl_shader_buf buf, const char *fmt, ...) { pl_assert(buf >= 0 && buf < SH_BUF_COUNT); va_list ap; va_start(ap, fmt); pl_str_append_vasprintf_c(sh, &sh->buffers[buf], fmt, ap); va_end(ap); } void sh_append_str(pl_shader sh, enum pl_shader_buf buf, pl_str str) { pl_assert(buf >= 0 && buf < SH_BUF_COUNT); pl_str_append(sh, &sh->buffers[buf], str); } static const char *insigs[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "vec4 color", }; static const char *outsigs[] = { [PL_SHADER_SIG_NONE] = "void", [PL_SHADER_SIG_COLOR] = "vec4", }; static const char *retvals[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "return color;", }; // libplacebo currently only allows 2D samplers for shader signatures static const char *samplers2D[] = { [PL_SAMPLER_NORMAL] = "sampler2D", [PL_SAMPLER_RECT] = "sampler2DRect", [PL_SAMPLER_EXTERNAL] = "samplerExternalOES", }; ident_t sh_subpass(pl_shader sh, const pl_shader sub) { pl_assert(sh->mutable); if (SH_PARAMS(sh).id == SH_PARAMS(sub).id) { PL_TRACE(sh, "Can't merge shaders: conflicting identifiers!"); return NULL; } // Check for shader compatibility int res_w = PL_DEF(sh->output_w, sub->output_w), res_h = PL_DEF(sh->output_h, sub->output_h); if ((sub->output_w && res_w != sub->output_w) || (sub->output_h && res_h != sub->output_h)) { PL_TRACE(sh, "Can't merge shaders: incompatible sizes: %dx%d and %dx%d", sh->output_w, sh->output_h, sub->output_w, sub->output_h); return NULL; } if (sub->type == SH_COMPUTE) { int subw = sub->res.compute_group_size[0], subh = sub->res.compute_group_size[1]; bool flex = sub->flexible_work_groups; if (!sh_try_compute(sh, subw, subh, flex, sub->res.compute_shmem)) { PL_TRACE(sh, "Can't merge shaders: incompatible block sizes or " "exceeded shared memory resource capabilities"); return NULL; } } sh->output_w = res_w; sh->output_h = res_h; // Append the prelude and header pl_str_append(sh, &sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]); pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_HEADER]); // Append the body as a new header function ident_t name = sh_fresh(sh, "sub"); if (sub->res.input == PL_SHADER_SIG_SAMPLER) { pl_assert(sub->sampler_prefix); GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n", outsigs[sub->res.output], name, sub->sampler_prefix, samplers2D[sub->sampler_type]); } else { GLSLH("%s %s(%s) {\n", outsigs[sub->res.output], name, insigs[sub->res.input]); } pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]); GLSLH("%s\n}\n\n", retvals[sub->res.output]); // Copy over all of the descriptors etc. for (int i = 0; i < sub->tmp.num; i++) PL_ARRAY_APPEND(sh, sh->tmp, pl_ref_dup(sub->tmp.elem[i])); PL_ARRAY_CONCAT(sh, sh->vas, sub->vas); PL_ARRAY_CONCAT(sh, sh->vars, sub->vars); PL_ARRAY_CONCAT(sh, sh->descs, sub->descs); PL_ARRAY_CONCAT(sh, sh->consts, sub->consts); PL_ARRAY_CONCAT(sh, sh->steps, sub->steps); return name; } // Finish the current shader body and return its function name static ident_t sh_split(pl_shader sh) { pl_assert(sh->mutable); // Concatenate the body onto the head as a new function ident_t name = sh_fresh(sh, "main"); if (sh->res.input == PL_SHADER_SIG_SAMPLER) { pl_assert(sh->sampler_prefix); GLSLH("%s %s(%c%s src_tex, vec2 tex_coord) {\n", outsigs[sh->res.output], name, sh->sampler_prefix, samplers2D[sh->sampler_type]); } else { GLSLH("%s %s(%s) {\n", outsigs[sh->res.output], name, insigs[sh->res.input]); } if (sh->buffers[SH_BUF_BODY].len) { pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_BODY]); sh->buffers[SH_BUF_BODY].len = 0; sh->buffers[SH_BUF_BODY].buf[0] = '\0'; // for sanity / efficiency } if (sh->buffers[SH_BUF_FOOTER].len) { pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_FOOTER]); sh->buffers[SH_BUF_FOOTER].len = 0; sh->buffers[SH_BUF_FOOTER].buf[0] = '\0'; } GLSLH("%s\n}\n\n", retvals[sh->res.output]); return name; } const struct pl_shader_res *pl_shader_finalize(pl_shader sh) { if (sh->failed) return NULL; if (!sh->mutable) return &sh->res; // Split the shader. This finalizes the body and adds it to the header sh->res.name = sh_split(sh); // Padding for readability GLSLP("\n"); // Concatenate the header onto the prelude to form the final output pl_str *glsl = &sh->buffers[SH_BUF_PRELUDE]; pl_str_append(sh, glsl, sh->buffers[SH_BUF_HEADER]); // Generate the pretty description sh->res.description = "(unknown shader)"; if (sh->steps.num) { // Reuse this buffer pl_str *desc = &sh->buffers[SH_BUF_BODY]; desc->len = 0; for (int i = 0; i < sh->steps.num; i++) { const char *step = sh->steps.elem[i]; if (!step) continue; // Group together duplicates. We're okay using a weak equality // check here because all pass descriptions are static strings. int count = 1; for (int j = i+1; j < sh->steps.num; j++) { if (sh->steps.elem[j] == step) { sh->steps.elem[j] = NULL; count++; } } if (i > 0) pl_str_append(sh, desc, pl_str0(", ")); pl_str_append(sh, desc, pl_str0(step)); if (count > 1) pl_str_append_asprintf(sh, desc, " x%d", count); } sh->res.description = (char *) desc->buf; } // Set the vas/vars/descs sh->res.vertex_attribs = sh->vas.elem; sh->res.num_vertex_attribs = sh->vas.num; sh->res.variables = sh->vars.elem; sh->res.num_variables = sh->vars.num; sh->res.descriptors = sh->descs.elem; sh->res.num_descriptors = sh->descs.num; sh->res.constants = sh->consts.elem; sh->res.num_constants = sh->consts.num; sh->res.steps = sh->steps.elem; sh->res.num_steps = sh->steps.num; // Update the result pointer and return sh->res.glsl = (char *) glsl->buf; sh->mutable = false; return &sh->res; } bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h) { if (sh->failed) { SH_FAIL(sh, "Attempting to modify a failed shader!"); return false; } if (!sh->mutable) { SH_FAIL(sh, "Attempted to modify an immutable shader!"); return false; } if ((w && sh->output_w && sh->output_w != w) || (h && sh->output_h && sh->output_h != h)) { SH_FAIL(sh, "Illegal sequence of shader operations: Incompatible " "output size requirements %dx%d and %dx%d", sh->output_w, sh->output_h, w, h); return false; } static const char *names[] = { [PL_SHADER_SIG_NONE] = "PL_SHADER_SIG_NONE", [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR", }; // If we require an input, but there is none available - just get it from // the user by turning it into an explicit input signature. if (!sh->res.output && insig) { pl_assert(!sh->res.input); sh->res.input = insig; } else if (sh->res.output != insig) { SH_FAIL(sh, "Illegal sequence of shader operations! Current output " "signature is '%s', but called operation expects '%s'!", names[sh->res.output], names[insig]); return false; } // All of our shaders end up returning a vec4 color sh->res.output = PL_SHADER_SIG_COLOR; sh->output_w = PL_DEF(sh->output_w, w); sh->output_h = PL_DEF(sh->output_h, h); return true; } void pl_shader_obj_destroy(pl_shader_obj *ptr) { pl_shader_obj obj = *ptr; if (!obj) return; if (obj->uninit) obj->uninit(obj->gpu, obj->priv); *ptr = NULL; pl_free(obj); } void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(pl_gpu gpu, void *priv)) { if (!ptr) return NULL; pl_shader_obj obj = *ptr; if (obj && obj->gpu != SH_GPU(sh)) { SH_FAIL(sh, "Passed pl_shader_obj belongs to different GPU!"); return NULL; } if (obj && obj->type != type) { SH_FAIL(sh, "Passed pl_shader_obj of wrong type! Shader objects must " "always be used with the same type of shader."); return NULL; } if (!obj) { obj = pl_zalloc_ptr(NULL, obj); obj->gpu = SH_GPU(sh); obj->type = type; obj->priv = pl_zalloc(obj, priv_size); obj->uninit = uninit; } *ptr = obj; return obj->priv; } ident_t sh_prng(pl_shader sh, bool temporal, ident_t *p_state) { ident_t randfun = sh_fresh(sh, "rand"), state = sh_fresh(sh, "state"); if (sh_glsl(sh).version >= 130) { // Based on pcg3d (http://jcgt.org/published/0009/03/02/) GLSLP("#define prng_t uvec3\n"); GLSLH("vec3 %s(inout uvec3 s) { \n" " s = 1664525u * s + uvec3(1013904223u); \n" " s.x += s.y * s.z; \n" " s.y += s.z * s.x; \n" " s.z += s.x * s.y; \n" " s ^= s >> 16u; \n" " s.x += s.y * s.z; \n" " s.y += s.z * s.x; \n" " s.z += s.x * s.y; \n" " return vec3(s) * 1.0/float(0xFFFFFFFFu); \n" "} \n", randfun); const char *seed = "0u"; if (temporal) { seed = sh_var(sh, (struct pl_shader_var) { .var = pl_var_uint("seed"), .data = &(unsigned int){ SH_PARAMS(sh).index }, .dynamic = true, }); }; GLSL("uvec3 %s = uvec3(gl_FragCoord.xy, %s); \n", state, seed); } else { // Based on SGGP (https://briansharpe.wordpress.com/2011/10/01/gpu-texture-free-noise/) ident_t permute = sh_fresh(sh, "permute"); GLSLP("#define prng_t float\n"); GLSLH("float %s(float x) { \n" " x = (34.0 * x + 1.0) * x; \n" " return fract(x * 1.0/289.0) * 289.0; \n" "} \n" "vec3 %s(inout float s) { \n" " vec3 ret; \n" " ret.x = %s(s); \n" " ret.y = %s(ret.x); \n" " ret.z = %s(ret.y); \n" " s = ret.z; \n" " return fract(ret * 1.0/41.0); \n" "} \n", permute, randfun, permute, permute, permute); static const double phi = 1.618033988749895; const char *seed = "0.0"; if (temporal) { seed = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("seed"), .data = &(float){ modff(phi * SH_PARAMS(sh).index, &(float){0}) }, .dynamic = true, }); }; GLSL("vec3 %s_m = vec3(fract(gl_FragCoord.xy * vec2(%f)), %s); \n" "%s_m += vec3(1.0); \n" "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z); \n", state, phi, seed, state, state, permute, permute, permute, state, state, state); } if (p_state) *p_state = state; ident_t res = sh_fresh(sh, "RAND"); GLSLH("#define %s (%s(%s))\n", res, randfun, state); return res; } // Defines a LUT position helper macro. This translates from an absolute texel // scale (0.0 - 1.0) to the texture coordinate scale for the corresponding // sample in a texture of dimension `lut_size`. static ident_t sh_lut_pos(pl_shader sh, int lut_size) { ident_t name = sh_fresh(sh, "LUT_POS"); GLSLH("#define %s(x) mix(%s, %s, (x)) \n", name, SH_FLOAT(0.5 / lut_size), SH_FLOAT(1.0 - 0.5 / lut_size)); return name; } struct sh_lut_obj { enum sh_lut_method method; enum pl_var_type type; bool linear; int width, height, depth, comps; uint64_t signature; bool error; // reset if params change // weights, depending on the method pl_tex tex; pl_str str; void *data; }; static void sh_lut_uninit(pl_gpu gpu, void *ptr) { struct sh_lut_obj *lut = ptr; pl_tex_destroy(gpu, &lut->tex); pl_free(lut->str.buf); pl_free(lut->data); *lut = (struct sh_lut_obj) {0}; } // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO) #define SH_LUT_MAX_LITERAL_SOFT 64 #define SH_LUT_MAX_LITERAL_HARD 256 ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params) { pl_gpu gpu = SH_GPU(sh); void *tmp = NULL; pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0); pl_assert(params->comps > 0); pl_assert(params->type); pl_assert(!params->linear || params->type == PL_VAR_FLOAT); int sizes[] = { params->width, params->height, params->depth }; int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1); int dims = params->depth ? 3 : params->height ? 2 : 1; int texdim = 0; uint32_t max_tex_dim[] = { gpu ? gpu->limits.max_tex_1d_dim : 0, gpu ? gpu->limits.max_tex_2d_dim : 0, (gpu && gpu->glsl.version > 100) ? gpu->limits.max_tex_3d_dim : 0, }; struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT, struct sh_lut_obj, sh_lut_uninit); if (!lut) return NULL; bool update = params->update || lut->signature != params->signature || params->type != lut->type || params->linear != lut->linear || params->width != lut->width || params->height != lut->height || params->depth != lut->depth || params->comps != lut->comps; if (lut->error && !update) return NULL; // suppress error spam until something changes // Try picking the right number of dimensions for the texture LUT. This // allows e.g. falling back to 2D textures if 1D textures are unsupported. for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) { // For a given dimension to be compatible, all coordinates need to be // within the maximum texture size for that dimension for (int i = 0; i < d; i++) { if (sizes[i] > max_tex_dim[d - 1]) goto next_dim; } // All dimensions are compatible, so pick this texture dimension texdim = d; break; next_dim: ; // `continue` out of the inner loop } static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = PL_FMT_SINT, [PL_VAR_UINT] = PL_FMT_UINT, [PL_VAR_FLOAT] = PL_FMT_FLOAT, }; enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE; if (params->linear) texcaps |= PL_FMT_CAP_LINEAR; pl_fmt texfmt = NULL; if (texdim) { texfmt = pl_find_fmt(gpu, fmt_type[params->type], params->comps, params->type == PL_VAR_FLOAT ? 16 : 32, pl_var_type_size(params->type) * 8, texcaps); } enum sh_lut_method method = params->method; // The linear sampling code currently only supports 1D linear interpolation if (params->linear && dims > 1) { if (texfmt) { method = SH_LUT_TEXTURE; } else { PL_ERR(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no " "texture support available!"); goto error; } } bool can_uniform = gpu && gpu->limits.max_variable_comps >= size * params->comps; bool can_literal = sh_glsl(sh).version > 110; // needed for literal arrays can_literal &= size <= SH_LUT_MAX_LITERAL_HARD && !params->dynamic; // Deselect unsupported methods if (method == SH_LUT_UNIFORM && !can_uniform) method = SH_LUT_AUTO; if (method == SH_LUT_LITERAL && !can_literal) method = SH_LUT_AUTO; if (method == SH_LUT_TEXTURE && !texfmt) method = SH_LUT_AUTO; // Sorted by priority if (!method && can_literal && size <= SH_LUT_MAX_LITERAL_SOFT) method = SH_LUT_LITERAL; if (!method && texfmt) method = SH_LUT_TEXTURE; if (!method && can_uniform) method = SH_LUT_UNIFORM; if (!method && can_literal) method = SH_LUT_LITERAL; if (!method) { PL_ERR(sh, "Can't generate LUT: no compatible methods!"); goto error; } // Reinitialize the existing LUT if needed update |= method != lut->method; if (update) { PL_DEBUG(sh, "LUT cache invalidated, regenerating.."); size_t buf_size = size * params->comps * pl_var_type_size(params->type); tmp = pl_zalloc(NULL, buf_size); params->fill(tmp, params); switch (method) { case SH_LUT_TEXTURE: { if (!texdim) { PL_ERR(sh, "Texture LUT exceeds texture dimensions!"); goto error; } if (!texfmt) { PL_ERR(sh, "Found no compatible texture format for LUT!"); goto error; } struct pl_tex_params tex_params = { .w = params->width, .h = PL_DEF(params->height, texdim >= 2 ? 1 : 0), .d = PL_DEF(params->depth, texdim >= 3 ? 1 : 0), .format = texfmt, .sampleable = true, .host_writable = params->dynamic, .initial_data = params->dynamic ? NULL : tmp, .debug_tag = PL_DEBUG_TAG, }; bool ok; if (params->dynamic) { ok = pl_tex_recreate(gpu, &lut->tex, &tex_params); if (ok) { ok = pl_tex_upload(gpu, pl_tex_transfer_params( .tex = lut->tex, .ptr = tmp, )); } } else { // Can't use pl_tex_recreate because of `initial_data` pl_tex_destroy(gpu, &lut->tex); lut->tex = pl_tex_create(gpu, &tex_params); ok = lut->tex; } if (!ok) { PL_ERR(sh, "Failed creating LUT texture!"); goto error; } break; } case SH_LUT_UNIFORM: pl_free(lut->data); lut->data = tmp; // re-use `tmp` tmp = NULL; break; case SH_LUT_LITERAL: { lut->str.len = 0; static const char prefix[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = 'i', [PL_VAR_UINT] = 'u', [PL_VAR_FLOAT] = ' ', }; for (int i = 0; i < size * params->comps; i += params->comps) { if (i > 0) pl_str_append_asprintf_c(lut, &lut->str, ","); if (params->comps > 1) { pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(", prefix[params->type], params->comps); } for (int c = 0; c < params->comps; c++) { switch (params->type) { case PL_VAR_FLOAT: pl_str_append_asprintf_c(lut, &lut->str, "%s%f", c > 0 ? "," : "", ((float *) tmp)[i+c]); break; case PL_VAR_UINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%u", c > 0 ? "," : "", ((unsigned int *) tmp)[i+c]); break; case PL_VAR_SINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%d", c > 0 ? "," : "", ((int *) tmp)[i+c]); break; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: pl_unreachable(); } } if (params->comps > 1) pl_str_append_asprintf_c(lut, &lut->str, ")"); } break; } case SH_LUT_AUTO: pl_unreachable(); } lut->method = method; lut->type = params->type; lut->linear = params->linear; lut->width = params->width; lut->height = params->height; lut->depth = params->depth; lut->comps = params->comps; } // Done updating, generate the GLSL ident_t name = sh_fresh(sh, "lut"); ident_t arr_name = NULL; static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"}; static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = { [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" }, [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" }, [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" }, }; switch (method) { case SH_LUT_TEXTURE: { assert(texdim); ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "weights", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = lut->tex, .sample_mode = params->linear ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, } }); // texelFetch requires GLSL >= 130, so fall back to the linear code if (params->linear || gpu->glsl.version < 130) { ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0}; for (int i = 0; i < dims; i++) pos_macros[i] = sh_lut_pos(sh, sizes[i]); GLSLH("#define %s(pos) (%s(%s, %s(\\\n", name, sh_tex_fn(sh, lut->tex->params), tex, vartypes[PL_VAR_FLOAT][texdim - 1]); for (int i = 0; i < texdim; i++) { char sep = i == 0 ? ' ' : ','; if (pos_macros[i]) { if (dims > 1) { GLSLH(" %c%s(%s(pos).%c)\\\n", sep, pos_macros[i], vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]); } else { GLSLH(" %c%s(float(pos))\\\n", sep, pos_macros[i]); } } else { GLSLH(" %c%f\\\n", sep, 0.5); } } GLSLH(" )).%s)\n", swizzles[params->comps - 1]); } else { GLSLH("#define %s(pos) (texelFetch(%s, %s(pos", name, tex, vartypes[PL_VAR_SINT][texdim - 1]); // Fill up extra components of the index for (int i = dims; i < texdim; i++) GLSLH(", 0"); GLSLH("), 0).%s)\n", swizzles[params->comps - 1]); } break; } case SH_LUT_UNIFORM: arr_name = sh_var(sh, (struct pl_shader_var) { .var = { .name = "weights", .type = params->type, .dim_v = params->comps, .dim_m = 1, .dim_a = size, }, .data = lut->data, }); break; case SH_LUT_LITERAL: arr_name = sh_fresh(sh, "weights"); GLSLH("const %s %s[%d] = %s[](\n ", vartypes[params->type][params->comps - 1], arr_name, size, vartypes[params->type][params->comps - 1]); pl_str_append(sh, &sh->buffers[SH_BUF_HEADER], lut->str); GLSLH(");\n"); break; case SH_LUT_AUTO: pl_unreachable(); } if (arr_name) { GLSLH("#define %s(pos) (%s[int((pos)%s)\\\n", name, arr_name, dims > 1 ? "[0]" : ""); int shift = params->width; for (int i = 1; i < dims; i++) { GLSLH(" + %d * int((pos)[%d])\\\n", shift, i); shift *= sizes[i]; } GLSLH(" ])\n"); if (params->linear) { pl_assert(dims == 1); pl_assert(params->type == PL_VAR_FLOAT); ident_t arr_lut = name; name = sh_fresh(sh, "lut_lin"); GLSLH("%s %s(float fpos) { \n" " fpos = clamp(fpos, 0.0, 1.0) * %d.0; \n" " float fbase = floor(fpos); \n" " float fceil = ceil(fpos); \n" " float fcoord = fpos - fbase; \n" " return mix(%s(fbase), %s(fceil), fcoord); \n" "} \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, size - 1, arr_lut, arr_lut); } } lut->error = false; pl_free(tmp); pl_assert(name); return name; error: lut->error = true; pl_free(tmp); return NULL; } const char *sh_bvec(const pl_shader sh, int dims) { static const char *bvecs[] = { [1] = "bool", [2] = "bvec2", [3] = "bvec3", [4] = "bvec4", }; static const char *vecs[] = { [1] = "float", [2] = "vec2", [3] = "vec3", [4] = "vec4", }; pl_assert(dims > 0 && dims < PL_ARRAY_SIZE(bvecs)); return sh_glsl(sh).version >= 130 ? bvecs[dims] : vecs[dims]; } libplacebo-v4.192.1/src/shaders.h000066400000000000000000000243061417677245700165740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include "common.h" #include "log.h" #include "gpu.h" // This represents an identifier (e.g. name of function, uniform etc.) for // a shader resource. The generated identifiers are immutable, but only live // until pl_shader_reset - so make copies when passing to external stuff. typedef const char * ident_t; enum pl_shader_buf { SH_BUF_PRELUDE, // extra #defines etc. SH_BUF_HEADER, // previous passes, helper function definitions, etc. SH_BUF_BODY, // partial contents of the "current" function SH_BUF_FOOTER, // will be appended to the end of the current function SH_BUF_COUNT, }; enum pl_shader_type { SH_AUTO, SH_COMPUTE, SH_FRAGMENT }; struct pl_shader { pl_log log; struct pl_shader_res res; // for accumulating some of the fields PL_ARRAY(struct pl_ref *) tmp; // only used for var/va/desc names and data bool failed; bool mutable; int output_w; int output_h; bool transpose; pl_str buffers[SH_BUF_COUNT]; enum pl_shader_type type; bool flexible_work_groups; enum pl_sampler_type sampler_type; char sampler_prefix; int fresh; // mutable versions of the fields from pl_shader_res PL_ARRAY(struct pl_shader_va) vas; PL_ARRAY(struct pl_shader_var) vars; PL_ARRAY(struct pl_shader_desc) descs; PL_ARRAY(struct pl_shader_const) consts; PL_ARRAY(const char *) steps; }; // Helper functions for convenience #define SH_PARAMS(sh) ((sh)->res.params) #define SH_GPU(sh) (SH_PARAMS(sh).gpu) #define SH_TMP(sh) ((sh)->tmp.elem[0]) // Returns the GLSL version, defaulting to desktop 130. struct pl_glsl_version sh_glsl(const pl_shader sh); #define SH_FAIL(sh, ...) do { \ sh->failed = true; \ PL_ERR(sh, __VA_ARGS__); \ } while (0) // Attempt enabling compute shaders for this pass, if possible bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem); // Attempt merging a secondary shader into the current shader. Returns NULL if // merging fails (e.g. incompatible signatures); otherwise returns an identifier // corresponding to the generated subpass function. ident_t sh_subpass(pl_shader sh, const pl_shader sub); // Helpers for adding new variables/descriptors/etc. with fresh, unique // identifier names. These will never conflict with other identifiers, even // if the shaders are merged together. ident_t sh_fresh(pl_shader sh, const char *name); // Add a new shader var and return its identifier ident_t sh_var(pl_shader sh, struct pl_shader_var sv); // Add a new shader desc and return its identifier. ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd); // Add a new shader constant and return its identifier. ident_t sh_const(pl_shader sh, struct pl_shader_const sc); // Helper functions for `sh_const` ident_t sh_const_int(pl_shader sh, const char *name, int val); ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val); ident_t sh_const_float(pl_shader sh, const char *name, float val); #define SH_INT(val) sh_const_int(sh, "const", val) #define SH_UINT(val) sh_const_uint(sh, "const", val) #define SH_FLOAT(val) sh_const_float(sh, "const", val) // Add a new vec2 vertex attribute from a pl_rect2df, or returns NULL on failure. ident_t sh_attr_vec2(pl_shader sh, const char *name, const struct pl_rect2df *rc); // Bind a texture under a given transformation and make its attributes // available as well. If an output pointer for one of the attributes is left // as NULL, that attribute will not be added. Returns NULL on failure. `rect` // is optional, and defaults to the full texture if left as NULL. // // Note that for e.g. compute shaders, the vec2 out_pos might be a macro that // expands to an expensive computation, and should be cached by the user. ident_t sh_bind(pl_shader sh, pl_tex tex, enum pl_tex_address_mode address_mode, enum pl_tex_sample_mode sample_mode, const char *name, const struct pl_rect2df *rect, ident_t *out_pos, ident_t *out_size, ident_t *out_pt); // Incrementally build up a buffer by adding new variable elements to the // buffer, resizing buf.buffer_vars if necessary. Returns whether or not the // variable could be successfully added (which may fail if you try exceeding // the size limits of the buffer type). If successful, the layout is stored // in *out_layout (may be NULL). bool sh_buf_desc_append(void *alloc, pl_gpu gpu, struct pl_shader_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var); size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc); // Underlying function for appending text to a shader void sh_append(pl_shader sh, enum pl_shader_buf buf, const char *fmt, ...) PL_PRINTF(3, 4); void sh_append_str(pl_shader sh, enum pl_shader_buf buf, pl_str str); #define GLSLP(...) sh_append(sh, SH_BUF_PRELUDE, __VA_ARGS__) #define GLSLH(...) sh_append(sh, SH_BUF_HEADER, __VA_ARGS__) #define GLSL(...) sh_append(sh, SH_BUF_BODY, __VA_ARGS__) #define GLSLF(...) sh_append(sh, SH_BUF_FOOTER, __VA_ARGS__) // Attach a description to a shader static inline void sh_describe(pl_shader sh, const char *desc) { PL_ARRAY_APPEND(sh, sh->steps, desc); }; // Requires that the share is mutable, has an output signature compatible // with the given input signature, as well as an output size compatible with // the given size requirements. Errors and returns false otherwise. bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h); // Shader resources enum pl_shader_obj_type { PL_SHADER_OBJ_INVALID = 0, PL_SHADER_OBJ_TONE_MAP, PL_SHADER_OBJ_SAMPLER, PL_SHADER_OBJ_DITHER, PL_SHADER_OBJ_ICC, PL_SHADER_OBJ_LUT, PL_SHADER_OBJ_AV1_GRAIN, PL_SHADER_OBJ_FILM_GRAIN, PL_SHADER_OBJ_RESHAPE, }; struct pl_shader_obj { enum pl_shader_obj_type type; pl_gpu gpu; void (*uninit)(pl_gpu gpu, void *priv); void *priv; }; // Returns (*ptr)->priv, or NULL on failure void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(pl_gpu gpu, void *priv)); #define SH_OBJ(sh, ptr, type, t, uninit) \ ((t*) sh_require_obj(sh, ptr, type, sizeof(t), uninit)) // Initializes a PRNG. The resulting string will directly evaluate to a // pseudorandom, uniformly distributed vec3 from [0.0,1.0]. Since this // algorithm works by mutating a state variable, if the user wants to use the // resulting PRNG inside a subfunction, they must add an extra `inout prng_t %s` // with the contents of `state` to the signature. (Optional) // // If `temporal` is set, the PRNG will vary across frames. ident_t sh_prng(pl_shader sh, bool temporal, ident_t *state); enum sh_lut_method { SH_LUT_AUTO = 0, // pick whatever makes the most sense SH_LUT_TEXTURE, // upload as texture SH_LUT_UNIFORM, // uniform array SH_LUT_LITERAL, // constant / literal array in shader source (fallback) }; struct sh_lut_params { pl_shader_obj *object; // Method and type of the LUT we intend to generate. enum sh_lut_method method; enum pl_var_type type; // LUT dimensions. Unused dimensions may be left as 0. int width; int height; int depth; int comps; // If true, LUT takes a vecN coordinate and linearly interpolates values, // rather than taking an ivecN. Requires `type == PL_VAR_FLOAT`! bool linear; // If true, the LUT will always be regenerated, even if the dimensions have // not changed. bool update; // Alternate way of triggering shader invalidations. If the signature // does not match the LUT's signature, it will be regenerated. uint64_t signature; // If set to true, shader objects will be preserved and updated in-place // rather than being treated as read-only. bool dynamic; // Will be called with a zero-initialized buffer whenever the data needs to // be computed, which happens whenever the size is changed, the shader // object is invalidated, or `update` is set to true. // // Note: Interpretation of `data` is according to `pl_var_type`. void (*fill)(void *data, const struct sh_lut_params *params); void *priv; }; #define sh_lut_params(...) (&(struct sh_lut_params) { __VA_ARGS__ }) // Makes a table of values available as a shader variable, using an a given // method (falling back if needed). The resulting identifier can be sampled // directly as %s(pos), where pos is a vector with the right number of // dimensions. `pos` must be an integer vector within the bounds of the array, // unless the method is `SH_LUT_LINEAR`, in which case it's a float vector that // gets interpolated and clamped as needed. Returns NULL on error. ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params); // Returns a GLSL-version appropriate "bvec"-like type. For GLSL 130+, this // returns bvecN. For GLSL 120, this returns vecN instead. The intended use of // this function is with mix(), which only accepts bvec in GLSL 130+. const char *sh_bvec(const pl_shader sh, int dims); // Returns the appropriate `texture`-equivalent function for the shader and // given texture. static inline const char *sh_tex_fn(const pl_shader sh, const struct pl_tex_params params) { static const char *suffixed[] = { [1] = "texture1D", [2] = "texture2D", [3] = "texture3D", }; int dims = pl_tex_params_dimension(params); return sh_glsl(sh).version >= 130 ? "texture" : suffixed[dims]; } libplacebo-v4.192.1/src/shaders/000077500000000000000000000000001417677245700164165ustar00rootroot00000000000000libplacebo-v4.192.1/src/shaders/colorspace.c000066400000000000000000002141541417677245700207230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" // Common constants for SMPTE ST.2084 (PQ) static const float PQ_M1 = 2610./4096 * 1./4, PQ_M2 = 2523./4096 * 128, PQ_C1 = 3424./4096, PQ_C2 = 2413./4096 * 32, PQ_C3 = 2392./4096 * 32; // Common constants for ARIB STD-B67 (HLG) static const float HLG_A = 0.17883277, HLG_B = 0.28466892, HLG_C = 0.55991073, HLG_REF = 1000.0 / PL_COLOR_SDR_WHITE; // Common constants for Panasonic V-Log static const float VLOG_B = 0.00873, VLOG_C = 0.241514, VLOG_D = 0.598206; // Common constants for Sony S-Log static const float SLOG_A = 0.432699, SLOG_B = 0.037584, SLOG_C = 0.616596 + 0.03, SLOG_P = 3.538813, SLOG_Q = 0.030001, SLOG_K2 = 155.0 / 219.0; void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr, enum pl_alpha_mode mode) { if (repr->alpha == PL_ALPHA_PREMULTIPLIED && mode == PL_ALPHA_INDEPENDENT) { GLSL("if (color.a > 1e-6) \n" " color.rgb /= vec3(color.a); \n"); repr->alpha = PL_ALPHA_INDEPENDENT; } if (repr->alpha == PL_ALPHA_INDEPENDENT && mode == PL_ALPHA_PREMULTIPLIED) { GLSL("color.rgb *= vec3(color.a); \n"); repr->alpha = PL_ALPHA_PREMULTIPLIED; } } static inline void reshape_mmr(pl_shader sh, ident_t mmr, bool single, int min_order, int max_order) { if (sh_glsl(sh).version < 130) { SH_FAIL(sh, "MMR reshaping requires GLSL 130+"); return; } if (single) { GLSL("const uint mmr_idx = 0u; \n"); } else { GLSL("uint mmr_idx = uint(coeffs.y); \n"); } assert(min_order <= max_order); if (min_order < max_order) GLSL("uint order = uint(coeffs.w); \n"); GLSL("vec4 sigX; \n" "s = coeffs.x; \n" "sigX.xyz = sig.xxy * sig.yzz; \n" "sigX.w = sigX.x * sig.z; \n" "s += dot(%s[mmr_idx + 0].xyz, sig); \n" "s += dot(%s[mmr_idx + 1], sigX); \n", mmr, mmr); if (max_order >= 2) { if (min_order < 2) GLSL("if (order >= 2) { \n"); GLSL("vec3 sig2 = sig * sig; \n" "vec4 sigX2 = sigX * sigX; \n" "s += dot(%s[mmr_idx + 2].xyz, sig2); \n" "s += dot(%s[mmr_idx + 3], sigX2); \n", mmr, mmr); if (max_order == 3) { if (min_order < 3) GLSL("if (order >= 3 { \n"); GLSL("s += dot(%s[mmr_idx + 4].xyz, sig2 * sig); \n" "s += dot(%s[mmr_idx + 5], sigX2 * sigX); \n", mmr, mmr); if (min_order < 3) GLSL("} \n"); } if (min_order < 2) GLSL("} \n"); } } static inline void reshape_poly(pl_shader sh) { GLSL("s = (coeffs.z * s + coeffs.y) * s + coeffs.x; \n"); } void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0) || !data) return; sh_describe(sh, "reshaping"); GLSL("// pl_shader_reshape \n" "{ \n" "vec3 sig; \n" "vec4 coeffs; \n" "float s; \n" "sig = clamp(color.rgb, 0.0, 1.0); \n"); float coeffs_data[8][4]; float mmr_packed_data[8*6][4]; for (int c = 0; c < 3; c++) { const struct pl_reshape_data *comp = &data->comp[c]; if (!comp->num_pivots) continue; pl_assert(comp->num_pivots >= 2 && comp->num_pivots <= 9); GLSL("s = sig[%d]; \n", c); // Prepare coefficients for GPU bool has_poly = false, has_mmr = false, mmr_single = true; int mmr_idx = 0, min_order = 3, max_order = 1; memset(coeffs_data, 0, sizeof(coeffs_data)); for (int i = 0; i < comp->num_pivots - 1; i++) { switch (comp->method[i]) { case 0: // polynomial has_poly = true; coeffs_data[i][3] = 0.0; // order=0 signals polynomial for (int k = 0; k < 3; k++) coeffs_data[i][k] = comp->poly_coeffs[i][k]; break; case 1: min_order = PL_MIN(min_order, comp->mmr_order[i]); max_order = PL_MAX(max_order, comp->mmr_order[i]); mmr_single = !has_mmr; has_mmr = true; coeffs_data[i][3] = (float) comp->mmr_order[i]; coeffs_data[i][0] = comp->mmr_constant[i]; coeffs_data[i][1] = (float) mmr_idx; for (int j = 0; j < comp->mmr_order[i]; j++) { // store weights per order as two packed vec4s float *mmr = &mmr_packed_data[mmr_idx][0]; mmr[0] = comp->mmr_coeffs[i][j][0]; mmr[1] = comp->mmr_coeffs[i][j][1]; mmr[2] = comp->mmr_coeffs[i][j][2]; mmr[3] = 0.0; // unused mmr[4] = comp->mmr_coeffs[i][j][3]; mmr[5] = comp->mmr_coeffs[i][j][4]; mmr[6] = comp->mmr_coeffs[i][j][5]; mmr[7] = comp->mmr_coeffs[i][j][6]; mmr_idx += 2; } break; default: pl_unreachable(); } } if (comp->num_pivots > 2) { // Skip the (irrelevant) lower and upper bounds float pivots_data[7]; memcpy(pivots_data, comp->pivots + 1, (comp->num_pivots - 2) * sizeof(pivots_data[0])); // Fill the remainder with a quasi-infinite sentinel pivot for (int i = comp->num_pivots - 2; i < PL_ARRAY_SIZE(pivots_data); i++) pivots_data[i] = 1e9f; ident_t pivots = sh_var(sh, (struct pl_shader_var) { .data = pivots_data, .var = { .name = "pivots", .type = PL_VAR_FLOAT, .dim_v = 1, .dim_m = 1, .dim_a = PL_ARRAY_SIZE(pivots_data), }, }); ident_t coeffs = sh_var(sh, (struct pl_shader_var) { .data = coeffs_data, .var = { .name = "coeffs", .type = PL_VAR_FLOAT, .dim_v = 4, .dim_m = 1, .dim_a = PL_ARRAY_SIZE(coeffs_data), }, }); // Efficiently branch into the correct set of coefficients GLSL("#define test(i) bvec4(s >= %s[i]) \n" "#define coef(i) %s[i] \n" "coeffs = mix(mix(mix(coef(0), coef(1), test(0)), \n" " mix(coef(2), coef(3), test(2)), \n" " test(1)), \n" " mix(mix(coef(4), coef(5), test(4)), \n" " mix(coef(6), coef(7), test(6)), \n" " test(5)), \n" " test(3)); \n" "#undef test \n" "#undef coef \n", pivots, coeffs); } else { // No need for a single pivot, just set the coeffs directly GLSL("coeffs = %s; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec4("coeffs"), .data = coeffs_data, })); } ident_t mmr = NULL; if (has_mmr) { mmr = sh_var(sh, (struct pl_shader_var) { .data = mmr_packed_data, .var = { .name = "mmr", .type = PL_VAR_FLOAT, .dim_v = 4, .dim_m = 1, .dim_a = mmr_idx, }, }); } if (has_mmr && has_poly) { GLSL("if (coeffs.w == 0.0) { \n"); reshape_poly(sh); GLSL("} else { \n"); reshape_mmr(sh, mmr, mmr_single, min_order, max_order); GLSL("} \n"); } else if (has_poly) { reshape_poly(sh); } else { assert(has_mmr); GLSL("{ \n"); reshape_mmr(sh, mmr, mmr_single, min_order, max_order); GLSL("} \n"); } // Hard-code these as constants because they're exceptionally unlikely // to change from frame to frame (if they do, shoot the sample author) ident_t lo = SH_FLOAT(comp->pivots[0]); ident_t hi = SH_FLOAT(comp->pivots[comp->num_pivots - 1]); GLSL("color[%d] = clamp(s, %s, %s); \n", c, lo, hi); } GLSL("} \n"); } void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "color decoding"); GLSL("// pl_shader_decode_color \n" "{ \n"); // Do this first because the following operations are potentially nonlinear pl_shader_set_alpha(sh, repr, PL_ALPHA_INDEPENDENT); // XYZ needs special handling due to the input gamma logic if (repr->sys == PL_COLOR_SYSTEM_XYZ) { ident_t scale = SH_FLOAT(pl_color_repr_normalize(repr)); GLSL("color.rgb = max(color.rgb, vec3(0.0)); \n" "color.rgb = pow(vec3(%s) * color.rgb, vec3(2.6)); \n", scale); } if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) { ident_t scale = SH_FLOAT(pl_color_repr_normalize(repr)); GLSL("color.rgb *= vec3(%s); \n", scale); pl_shader_dovi_reshape(sh, repr->dovi); } enum pl_color_system orig_sys = repr->sys; struct pl_transform3x3 tr = pl_color_repr_decode(repr, params); if (memcmp(&tr, &pl_transform3x3_identity, sizeof(tr))) { ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_c"), .data = tr.c, }); GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c); } switch (orig_sys) { case PL_COLOR_SYSTEM_BT_2020_C: // Conversion for C'rcY'cC'bc via the BT.2020 CL system: // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 // = (B'-Y'c) / 1.5816 | C'bc > 0 // // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 // = (R'-Y'c) / 0.9936 | C'rc > 0 // // as per the BT.2020 specification, table 4. This is a non-linear // transformation because (constant) luminance receives non-equal // contributions from the three different channels. GLSL("// constant luminance conversion \n" "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" " vec2(1.9404, 1.7184), \n" " %s(lessThanEqual(color.br, vec2(0.0)))) \n" " + color.gg; \n", sh_bvec(sh, 2)); // Expand channels to camera-linear light. This shader currently just // assumes everything uses the BT.2020 12-bit gamma function, since the // difference between 10 and 12-bit is negligible for anything other // than 12-bit content. GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" " vec3(1.0/0.45)), \n" " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n", sh_bvec(sh, 3)); // Calculate the green channel from the expanded RYcB, and recompress to G' // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B GLSL("color.g = (lin.g - 0.2627*lin.r - 0.0593*lin.b)*1.0/0.6780; \n" "color.g = mix(color.g * 4.5, \n" " 1.0993 * pow(color.g, 0.45) - 0.0993, \n" " %s(0.0181 <= color.g)); \n", sh_bvec(sh, 1)); break; case PL_COLOR_SYSTEM_BT_2100_PQ:; // Conversion process from the spec: // // 1. L'M'S' = cmat * ICtCp // 2. LMS = linearize(L'M'S') (EOTF for PQ, inverse OETF for HLG) // 3. RGB = lms2rgb * LMS // // After this we need to invert step 2 to arrive at non-linear RGB. // (It's important we keep the transfer function conversion separate // from the color system decoding, so we have to partially undo our // work here even though we will end up linearizing later on anyway) // Inverted from the matrix in the spec, transposed to column major static const char *bt2100_lms2rgb = "mat3(" " 3.43661, -0.79133, -0.0259499, " " -2.50645, 1.9836, -0.0989137, " "0.0698454, -0.192271, 1.12486) "; // PQ EOTF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1); // LMS matrix GLSL("color.rgb = %s * color.rgb; \n", bt2100_lms2rgb); // PQ OETF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_SYSTEM_BT_2100_HLG: // HLG OETF^-1 GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " %s(lessThan(vec3(0.5), color.rgb))); \n", HLG_C, HLG_A, HLG_B, sh_bvec(sh, 3)); // LMS matrix GLSL("color.rgb = %s * color.rgb; \n", bt2100_lms2rgb); // HLG OETF GLSL("color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " %s(lessThan(vec3(1.0), color.rgb))); \n", HLG_A, HLG_B, HLG_C, sh_bvec(sh, 3)); break; case PL_COLOR_SYSTEM_DOLBYVISION:; // Dolby Vision always outputs BT.2020-referred HPE LMS, so hard-code // the inverse LMS->RGB matrix corresponding to this color space. struct pl_matrix3x3 dovi_lms2rgb = {{ { 3.06441879, -2.16597676, 0.10155818}, {-0.65612108, 1.78554118, -0.12943749}, { 0.01736321, -0.04725154, 1.03004253}, }}; pl_matrix3x3_mul(&dovi_lms2rgb, &repr->dovi->linear); ident_t mat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("lms2rgb"), .data = PL_TRANSPOSE_3X3(dovi_lms2rgb.m), }); // PQ EOTF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1); // LMS matrix GLSL("color.rgb = %s * color.rgb; \n", mat); // PQ OETF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: break; // no special post-processing needed case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Gamma adjustment. Doing this here (in non-linear light) is technically // somewhat wrong, but this is just an aesthetic parameter and not really // meant for colorimetric precision, so we don't care too much. if (params && params->gamma == 0) { // Avoid division by zero GLSL("color.rgb = vec3(0.0); \n"); } else if (params && params->gamma != 1) { ident_t gamma = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("gamma"), .data = &(float){ 1 / params->gamma }, }); GLSL("color.rgb = pow(max(color.rgb, vec3(0.0)), vec3(%s)); \n", gamma); } GLSL("}\n"); } void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "color encoding"); GLSL("// pl_shader_encode_color \n" "{ \n"); switch (repr->sys) { case PL_COLOR_SYSTEM_BT_2020_C: // Expand R'G'B' to RGB GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" " vec3(1.0/0.45)), \n" " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n", sh_bvec(sh, 3)); // Compute Yc from RGB and compress to R'Y'cB' GLSL("color.g = dot(vec3(0.2627, 0.6780, 0.0593), lin); \n" "color.g = mix(color.g * 4.5, \n" " 1.0993 * pow(color.g, 0.45) - 0.0993, \n" " %s(0.0181 <= color.g)); \n", sh_bvec(sh, 1)); // Compute C'bc and C'rc into color.br GLSL("color.br = color.br - color.gg; \n" "color.br *= mix(vec2(1.0/1.5816, 1.0/0.9936), \n" " vec2(1.0/1.9404, 1.0/1.7184), \n" " %s(lessThanEqual(color.br, vec2(0.0)))); \n", sh_bvec(sh, 2)); break; case PL_COLOR_SYSTEM_BT_2100_PQ:; // Inverse of the matrix above static const char *bt2100_rgb2lms = "mat3(" "0.412109, 0.166748, 0.024170, " "0.523925, 0.720459, 0.075440, " "0.063965, 0.112793, 0.900394) "; GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1); GLSL("color.rgb = %s * color.rgb; \n", bt2100_rgb2lms); GLSL("color.rgb = pow(color.rgb, vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_SYSTEM_BT_2100_HLG: GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " %s(lessThan(vec3(0.5), color.rgb))); \n", HLG_C, HLG_A, HLG_B, sh_bvec(sh, 3)); GLSL("color.rgb = %s * color.rgb; \n", bt2100_rgb2lms); GLSL("color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " %s(lessThan(vec3(1.0), color.rgb))); \n", HLG_A, HLG_B, HLG_C, sh_bvec(sh, 3)); break; case PL_COLOR_SYSTEM_DOLBYVISION: SH_FAIL(sh, "Cannot un-apply dolbyvision yet (no inverse reshaping)!"); return; case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: break; // no special pre-processing needed case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Since this is a relatively rare operation, bypass it as much as possible bool skip = true; skip &= PL_DEF(repr->sys, PL_COLOR_SYSTEM_RGB) == PL_COLOR_SYSTEM_RGB; skip &= PL_DEF(repr->levels, PL_COLOR_LEVELS_FULL) == PL_COLOR_LEVELS_FULL; skip &= !repr->bits.sample_depth || !repr->bits.color_depth || repr->bits.sample_depth == repr->bits.color_depth; skip &= !repr->bits.bit_shift; if (!skip) { struct pl_color_repr copy = *repr; ident_t xyzscale = NULL; if (repr->sys == PL_COLOR_SYSTEM_XYZ) xyzscale = SH_FLOAT(1.0 / pl_color_repr_normalize(©)); struct pl_transform3x3 tr = pl_color_repr_decode(©, NULL); pl_transform3x3_invert(&tr); ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_c"), .data = tr.c, }); GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c); if (xyzscale) GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6)) * vec3(%s); \n", xyzscale); } if (repr->alpha == PL_ALPHA_PREMULTIPLIED) GLSL("color.rgb *= vec3(color.a); \n"); GLSL("}\n"); } static ident_t sh_luma_coeffs(pl_shader sh, const struct pl_raw_primaries *prim) { struct pl_matrix3x3 rgb2xyz; rgb2xyz = pl_get_rgb2xyz_matrix(prim); // FIXME: Cannot use `const vec3` due to glslang bug #2025 ident_t coeffs = sh_fresh(sh, "luma_coeffs"); GLSLH("#define %s vec3(%s, %s, %s) \n", coeffs, SH_FLOAT(rgb2xyz.m[1][0]), // RGB->Y vector SH_FLOAT(rgb2xyz.m[1][1]), SH_FLOAT(rgb2xyz.m[1][2])); return coeffs; } void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (csp->transfer == PL_COLOR_TRC_LINEAR) return; // Note that this clamp may technically violate the definition of // ITU-R BT.2100, which allows for sub-blacks and super-whites to be // displayed on the display where such would be possible. That said, the // problem is that not all gamma curves are well-defined on the values // outside this range, so we ignore it and just clamp anyway for sanity. GLSL("// pl_shader_linearize \n" "color.rgb = max(color.rgb, 0.0); \n"); float csp_min = csp->hdr.min_luma / PL_COLOR_SDR_WHITE; float csp_max = csp->hdr.max_luma / PL_COLOR_SDR_WHITE; csp_max = PL_DEF(csp_max, 1); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" " pow((color.rgb + vec3(0.055))/vec3(1.055), \n" " vec3(2.4)), \n" " %s(lessThan(vec3(0.04045), color.rgb))); \n", sh_bvec(sh, 3)); goto scale_out; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); GLSL("color.rgb = %s * pow(color.rgb + vec3(%s), vec3(2.4)); \n", SH_FLOAT(a), SH_FLOAT(b)); return; } case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.8));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA20: GLSL("color.rgb = pow(color.rgb, vec3(2.0));\n"); goto scale_out; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(2.2));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA24: GLSL("color.rgb = pow(color.rgb, vec3(2.4));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA26: GLSL("color.rgb = pow(color.rgb, vec3(2.6));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(2.8));\n"); goto scale_out; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" " pow(color.rgb, vec3(1.8)), \n" " %s(lessThan(vec3(0.03125), color.rgb))); \n", sh_bvec(sh, 3)); goto scale_out; case PL_COLOR_TRC_PQ: GLSL("color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" // PQ's output range is 0-10000, but we need it to be relative to // to PL_COLOR_SDR_WHITE instead, so rescale "color.rgb *= vec3(%f); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); // OETF^-1 GLSL("color.rgb = %s * color.rgb + vec3(%s); \n" "color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " %s(lessThan(vec3(0.5), color.rgb))); \n", SH_FLOAT(1 - b), SH_FLOAT(b), HLG_C, HLG_A, HLG_B, sh_bvec(sh, 3)); // OOTF GLSL("color.rgb *= 1.0 / 12.0; \n" "color.rgb *= %s * pow(max(dot(%s, color.rgb), 0.0), %s); \n", SH_FLOAT(csp_max), sh_luma_coeffs(sh, pl_raw_primaries_get(csp->primaries)), SH_FLOAT(y - 1)); return; } case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f), \n" " %s(lessThanEqual(vec3(0.181), color.rgb))); \n", VLOG_D, VLOG_C, VLOG_B, sh_bvec(sh, 3)); return; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f); \n", SLOG_C, SLOG_A, SLOG_B); return; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f)) * vec3(1.0/%f), \n" " %s(lessThanEqual(vec3(%f), color.rgb))); \n", SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, sh_bvec(sh, 3), SLOG_Q); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); scale_out: if (csp_max != 1 || csp_min != 0) { GLSL("color.rgb = %s * color.rgb + vec3(%s); \n", SH_FLOAT(csp_max - csp_min), SH_FLOAT(csp_min)); } } void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (csp->transfer == PL_COLOR_TRC_LINEAR) return; GLSL("// pl_shader_delinearize \n"); float csp_min = csp->hdr.min_luma / PL_COLOR_SDR_WHITE; float csp_max = csp->hdr.max_luma / PL_COLOR_SDR_WHITE; csp_max = PL_DEF(csp_max, 1); switch (csp->transfer) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: ; if (csp_max != 1 || csp_min != 0) { GLSL("color.rgb = %s * color.rgb + vec3(%s); \n", SH_FLOAT(1 / (csp_max - csp_min)), SH_FLOAT(-csp_min / (csp_max - csp_min))); } break; case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_HLG: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: break; // scene-referred or absolute scale case PL_COLOR_TRC_COUNT: pl_unreachable(); } GLSL("color.rgb = max(color.rgb, 0.0); \n"); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(12.92), \n" " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" " - vec3(0.055), \n" " %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n", sh_bvec(sh, 3)); return; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); GLSL("color.rgb = pow(%s * color.rgb, vec3(1.0/2.4)) - vec3(%s); \n", SH_FLOAT(1.0 / a), SH_FLOAT(b)); return; } case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.8));\n"); return; case PL_COLOR_TRC_GAMMA20: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.0));\n"); return; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.2));\n"); return; case PL_COLOR_TRC_GAMMA24: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));\n"); return; case PL_COLOR_TRC_GAMMA26: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6));\n"); return; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.8));\n"); return; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(16.0), \n" " pow(color.rgb, vec3(1.0/1.8)), \n" " %s(lessThanEqual(vec3(0.001953), color.rgb))); \n", sh_bvec(sh, 3)); return; case PL_COLOR_TRC_PQ: GLSL("color.rgb *= vec3(1.0/%f); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", 10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); // OOTF^-1 GLSL("color.rgb *= 1.0 / %s; \n" "color.rgb *= 12.0 * max(1e-6, pow(dot(%s, color.rgb), %s)); \n", SH_FLOAT(csp_max), sh_luma_coeffs(sh, pl_raw_primaries_get(csp->primaries)), SH_FLOAT((1 - y) / y)); // OETF GLSL("color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " %s(lessThan(vec3(1.0), color.rgb))); \n" "color.rgb = %s * color.rgb + vec3(%s); \n", HLG_A, HLG_B, HLG_C, sh_bvec(sh, 3), SH_FLOAT(1 / (1 - b)), SH_FLOAT(-b / (1 - b))); return; } case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" " vec3(%f) * log(color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " %s(lessThanEqual(vec3(0.01), color.rgb))); \n", VLOG_C / M_LN10, VLOG_B, VLOG_D, sh_bvec(sh, 3)); return; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", SLOG_A / M_LN10, SLOG_B, SLOG_C); return; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " %s(lessThanEqual(vec3(0.0), color.rgb))); \n", SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, sh_bvec(sh, 3)); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } const struct pl_sigmoid_params pl_sigmoid_default_params = { PL_SIGMOID_DEFAULTS }; void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, 0.75); float slope = PL_DEF(params->slope, 6.5); // This function needs to go through (0,0) and (1,1), so we compute the // values at 1 and 0, and then scale/shift them, respectively. float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_sigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4(%s) - log(vec4(1.0) / (color * vec4(%s) + vec4(%s)) \n" " - vec4(1.0)) * vec4(%s); \n", SH_FLOAT(center), SH_FLOAT(scale), SH_FLOAT(offset), SH_FLOAT(1.0 / slope)); } void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; // See: pl_shader_sigmoidize params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, 0.75); float slope = PL_DEF(params->slope, 6.5); float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_unsigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4(%s) / (vec4(1.0) + exp(vec4(%s) * (vec4(%s) - color))) \n" " - vec4(%s); \n", SH_FLOAT(1.0 / scale), SH_FLOAT(slope), SH_FLOAT(center), SH_FLOAT(offset / scale)); } const struct pl_peak_detect_params pl_peak_detect_default_params = { PL_PEAK_DETECT_DEFAULTS }; struct sh_tone_map_obj { struct pl_tone_map_params params; pl_shader_obj lut; // Peak detection state pl_buf peak_buf; struct pl_shader_desc desc; float margin; }; static void sh_tone_map_uninit(pl_gpu gpu, void *ptr) { struct sh_tone_map_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); pl_buf_destroy(gpu, &obj->peak_buf); memset(obj, 0, sizeof(*obj)); } static inline float iir_coeff(float rate) { float a = 1.0 - cos(1.0 / rate); return sqrt(a*a + 2*a) - a; } bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp, pl_shader_obj *state, const struct pl_peak_detect_params *params) { params = PL_DEF(params, &pl_peak_detect_default_params); if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return false; if (!sh_try_compute(sh, 8, 8, true, 2 * sizeof(int32_t))) { PL_ERR(sh, "HDR peak detection requires compute shaders!"); return false; } if (sh_glsl(sh).version < 130) { // uint was added in GLSL 130 PL_ERR(sh, "HDR peak detection requires GLSL >= 130!"); return false; } struct sh_tone_map_obj *obj; obj = SH_OBJ(sh, state, PL_SHADER_OBJ_TONE_MAP, struct sh_tone_map_obj, sh_tone_map_uninit); if (!obj) return false; pl_gpu gpu = SH_GPU(sh); obj->margin = params->overshoot_margin; if (!obj->peak_buf) { obj->desc = (struct pl_shader_desc) { .desc = { .name = "PeakDetect", .type = PL_DESC_BUF_STORAGE, }, }; // Note: Don't change this order, `vec2 average` being the first // element is hard-coded in `pl_get_detected_peak` bool ok = true; ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_vec2("average")); ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_sum")); ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_int("frame_max")); ok &= sh_buf_desc_append(obj, gpu, &obj->desc, NULL, pl_var_uint("counter")); if (!ok) { PL_ERR(sh, "HDR peak detection exhausts device limits!"); return false; } // Create the SSBO size_t size = sh_buf_desc_size(&obj->desc); static const uint8_t zero[32] = {0}; pl_assert(sizeof(zero) >= size); struct pl_buf_params buf_params = { .size = size, .host_readable = true, .memory_type = PL_BUF_MEM_DEVICE, .storable = true, .initial_data = zero, .debug_tag = PL_DEBUG_TAG, }; // Attempt creating host-readable SSBO first, suppress errors pl_log_level_cap(gpu->log, PL_LOG_DEBUG); obj->peak_buf = pl_buf_create(gpu, &buf_params); pl_log_level_cap(gpu->log, PL_LOG_NONE); if (!obj->peak_buf) { // Fall back to non-host-readable SSBO buf_params.host_readable = false; obj->peak_buf = pl_buf_create(gpu, &buf_params); } obj->desc.binding.object = obj->peak_buf; } if (!obj->peak_buf) { SH_FAIL(sh, "Failed creating peak detection SSBO!"); return false; } // Attach the SSBO and perform the peak detection logic obj->desc.desc.access = PL_DESC_ACCESS_READWRITE; obj->desc.memory = PL_MEMORY_COHERENT; sh_desc(sh, obj->desc); sh_describe(sh, "peak detection"); GLSL("// pl_shader_detect_peak \n" "{ \n" "vec4 color_orig = color; \n"); // Decode the color into linear light absolute scale representation pl_color_space_infer(&csp); pl_shader_linearize(sh, &csp); // For performance, we want to do as few atomic operations on global // memory as possible, so use an atomic in shmem for the work group. ident_t wg_sum = sh_fresh(sh, "wg_sum"), wg_max = sh_fresh(sh, "wg_max"); GLSLH("shared int %s; \n", wg_sum); GLSLH("shared int %s; \n", wg_max); GLSL("%s = 0; %s = 0; \n" "barrier(); \n", wg_sum, wg_max); // Chosen to avoid overflowing on an 8K buffer const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0; GLSL("float sig_max = max(max(color.r, color.g), color.b); \n" "float sig_log = log(max(sig_max, %f)); \n" "int isig_max = int(sig_max * %f); \n" "int isig_log = int(sig_log * %f); \n", log_min, sig_scale, log_scale); // Update the work group's shared atomics if (sh_glsl(sh).subgroup_size) { GLSL("int group_max = subgroupMax(isig_max); \n" "int group_sum = subgroupAdd(isig_log); \n" "if (subgroupElect()) { \n" " atomicMax(%s, group_max); \n" " atomicAdd(%s, group_sum); \n" "} \n" "barrier(); \n", wg_max, wg_sum); } else { GLSL("atomicMax(%s, isig_max); \n" "atomicAdd(%s, isig_log); \n" "barrier(); \n", wg_max, wg_sum); } GLSL("color = color_orig; \n" "} \n"); // Have one thread per work group update the global atomics. Do this // at the end of the shader to avoid clobbering `average`, in case the // state object will be used by the same pass. GLSLF("// pl_shader_detect_peak \n" "if (gl_LocalInvocationIndex == 0u) { \n" " int wg_avg = %s / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y); \n" " atomicAdd(frame_sum, wg_avg); \n" " atomicMax(frame_max, %s); \n" " memoryBarrierBuffer(); \n" "} \n" "barrier(); \n", wg_sum, wg_max); // Finally, to update the global state per dispatch, we increment a counter GLSLF("if (gl_LocalInvocationIndex == 0u) { \n" " uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y; \n" " if (atomicAdd(counter, 1u) == num_wg - 1u) { \n" " vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);\n" " cur *= vec2(1.0 / %f, 1.0 / %f); \n" " cur.x = exp(cur.x); \n" " cur.y = max(cur.y, %s); \n", log_scale, sig_scale, SH_FLOAT(PL_DEF(params->minimum_peak, 1.0))); // Set the initial value accordingly if it contains no data GLSLF(" if (average.y == 0.0) \n" " average = cur; \n"); // Use an IIR low-pass filter to smooth out the detected values GLSLF(" average += %s * (cur - average); \n", SH_FLOAT(iir_coeff(PL_DEF(params->smoothing_period, 100.0)))); // Scene change hysteresis float log_db = 10.0 / log(10.0); if (params->scene_threshold_low > 0 && params->scene_threshold_high > 0) { GLSLF(" float delta = abs(log(cur.x / average.x)); \n" " average = mix(average, cur, smoothstep(%s, %s, delta)); \n", SH_FLOAT(params->scene_threshold_low / log_db), SH_FLOAT(params->scene_threshold_high / log_db)); } // Reset SSBO state for the next frame GLSLF(" frame_sum = 0; \n" " frame_max = 0; \n" " counter = 0u; \n" " memoryBarrierBuffer(); \n" " } \n" "} \n"); return true; } bool pl_get_detected_peak(const pl_shader_obj state, float *out_peak, float *out_avg) { if (!state || state->type != PL_SHADER_OBJ_TONE_MAP) return false; struct sh_tone_map_obj *obj = state->priv; pl_gpu gpu = state->gpu; pl_buf buf = obj->peak_buf; if (!buf) return false; float average[2] = {0}; pl_assert(obj->peak_buf->params.size >= sizeof(average)); if (buf->params.host_readable) { // We can read directly from the SSBO if (!pl_buf_read(gpu, buf, 0, average, sizeof(average))) { PL_ERR(gpu, "Failed reading from peak detect state buffer"); return false; } } else { // We can't read directly from the SSBO, go via an intermediary pl_buf tmp = pl_buf_create(gpu, pl_buf_params( .size = sizeof(average), .host_readable = true, )); if (!tmp) { PL_ERR(gpu, "Failed creating buffer for SSBO read-back"); return false; } pl_buf_copy(gpu, tmp, 0, buf, 0, sizeof(average)); if (!pl_buf_read(gpu, tmp, 0, average, sizeof(average))) { PL_ERR(gpu, "Failed reading from SSBO read-back buffer"); pl_buf_destroy(gpu, &tmp); return false; } pl_buf_destroy(gpu, &tmp); } *out_avg = average[0]; *out_peak = average[1]; if (obj->margin > 0.0) { *out_peak *= 1.0 + obj->margin; *out_peak = PL_MIN(*out_peak, 10000 / PL_COLOR_SDR_WHITE); } return true; } void pl_reset_detected_peak(pl_shader_obj state) { if (!state || state->type != PL_SHADER_OBJ_TONE_MAP) return; struct sh_tone_map_obj *obj = state->priv; pl_buf_destroy(state->gpu, &obj->peak_buf); } const struct pl_color_map_params pl_color_map_default_params = { PL_COLOR_MAP_DEFAULTS }; // Get the LUT range for the dynamic tone mapping LUT static void dynamic_lut_range(float *idx_min, float *idx_max, const struct pl_tone_map_params *params) { float max_peak = params->input_max; float min_peak = pl_hdr_rescale(params->output_scaling, params->input_scaling, params->output_max); // Add some headroom to avoid no-op tone mapping. (This is because // many curves are not good approximations of a no-op tone mapping // function even when tone mapping to very similar values) *idx_min = PL_MIX(min_peak, max_peak, 0.05f); *idx_max = max_peak; } static void fill_lut(void *data, const struct sh_lut_params *params) { struct pl_tone_map_params *lut_params = params->priv; assert(lut_params->lut_size == params->width); float *lut = data; if (params->height) { // Dynamic tone-mapping, generate a LUT curve for each possible peak float idx_min, idx_max; dynamic_lut_range(&idx_min, &idx_max, lut_params); for (int i = 0; i < params->height; i++) { float x = (float) i / (params->height - 1); lut_params->input_max = PL_MIX(idx_min, idx_max, x); pl_tone_map_generate(lut, lut_params); lut += params->width; } lut_params->input_max = idx_max; // sanity } else { // Static tone-mapping, generate only a single curve pl_tone_map_generate(lut, lut_params); } } static void tone_map(pl_shader sh, const struct pl_color_space *src, const struct pl_color_space *dst, pl_shader_obj *state, const struct pl_color_map_params *params) { float src_min = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_NORM, src->hdr.min_luma), src_max = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_NORM, src->hdr.max_luma), dst_min = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_NORM, dst->hdr.min_luma), dst_max = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_NORM, dst->hdr.max_luma); // Some tone mapping functions don't handle values of absolute 0 very well, // so clip the minimums to a very small positive value src_min = PL_MAX(src_min, 1e-7); dst_min = PL_MAX(dst_min, 1e-7); if (!params->inverse_tone_mapping) { // Never exceed the source unless requested, but still allow // black point adaptation dst_max = PL_MIN(dst_max, src_max); } // Round sufficiently similar values if (fabs(src_max - dst_max) < 1e-6) dst_max = src_max; if (fabs(src_min - dst_min) < 1e-6) dst_min = src_min; struct pl_tone_map_params lut_params = { .function = params->tone_mapping_function, .param = params->tone_mapping_param, .input_scaling = PL_HDR_SQRT, .output_scaling = PL_HDR_NORM, .lut_size = PL_DEF(params->lut_size, pl_color_map_default_params.lut_size), .input_min = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_SQRT, src_min), .input_max = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_SQRT, src_max), .output_min = dst_min, .output_max = dst_max, }; enum pl_tone_map_mode mode = params->tone_mapping_mode; if (params->tone_mapping_algo) { // Backwards compatibility static const struct pl_tone_map_function * funcs[PL_TONE_MAPPING_ALGORITHM_COUNT] = { [PL_TONE_MAPPING_CLIP] = &pl_tone_map_clip, [PL_TONE_MAPPING_MOBIUS] = &pl_tone_map_mobius, [PL_TONE_MAPPING_REINHARD] = &pl_tone_map_reinhard, [PL_TONE_MAPPING_HABLE] = &pl_tone_map_hable, [PL_TONE_MAPPING_GAMMA] = &pl_tone_map_gamma, [PL_TONE_MAPPING_LINEAR] = &pl_tone_map_linear, [PL_TONE_MAPPING_BT_2390] = &pl_tone_map_bt2390, }; lut_params.function = funcs[params->tone_mapping_algo]; // Backwards compatibility with older API, explicitly default the tone // mapping mode based on the previous values of desat_str etc. if (params->desaturation_strength == 1 && params->desaturation_exponent == 0) { mode = PL_DEF(mode, PL_TONE_MAP_RGB); } else if (params->desaturation_strength > 0) { mode = PL_DEF(mode, PL_TONE_MAP_HYBRID); } else { mode = PL_DEF(mode, PL_TONE_MAP_LUMA); } } if (pl_tone_map_params_noop(&lut_params)) return; sh_describe(sh, "tone mapping"); const struct pl_tone_map_function *fun = lut_params.function; struct sh_tone_map_obj *obj = NULL; ident_t lut = NULL; bool can_fixed = !params->force_tone_mapping_lut; bool is_noop = can_fixed && (!fun || fun == &pl_tone_map_clip); bool pure_bpc = can_fixed && src_max == dst_max; bool dynamic_peak = false; if (state && !(is_noop || pure_bpc)) { obj = SH_OBJ(sh, state, PL_SHADER_OBJ_TONE_MAP, struct sh_tone_map_obj, sh_tone_map_uninit); if (!obj) return; // Only use dynamic peak detection for range reductions dynamic_peak = obj->peak_buf && src_max > dst_max; lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .method = SH_LUT_AUTO, .type = PL_VAR_FLOAT, .width = lut_params.lut_size, .height = dynamic_peak ? lut_params.lut_size : 0, .comps = 1, .linear = true, .update = !pl_tone_map_params_equal(&lut_params, &obj->params), .fill = fill_lut, .priv = &lut_params, )); if (!lut) is_noop = true; obj->params = lut_params; } // Hard-clamp the input values to the claimed input peak. Do this // per-channel to fix issues with excessively oversaturated highlights in // broken files that contain values outside their stated brightness range. GLSL("color.rgb = clamp(color.rgb, %s, %s); \n", SH_FLOAT(src_min), SH_FLOAT(src_max)); if (is_noop) { GLSL("#define tone_map(x) clamp((x), %s, %s) \n", SH_FLOAT(dst_min), SH_FLOAT(dst_max)); } else if (pure_bpc) { // Pure black point compensation const float scale = (dst_max - dst_min) / (src_max - src_min); GLSL("#define tone_map(x) (%s * (x) + %s) \n", SH_FLOAT(scale), SH_FLOAT(dst_min - scale * src_min)); } else if (dynamic_peak) { // Dynamic 2D LUT obj->desc.desc.access = PL_DESC_ACCESS_READONLY; obj->desc.memory = 0; sh_desc(sh, obj->desc); float idx_min, idx_max; dynamic_lut_range(&idx_min, &idx_max, &lut_params); GLSL("const float idx_min = %s; \n" "const float idx_max = %s; \n" "float input_max = idx_max; \n" "if (average.y != 0.0) { \n" " float sig_peak = average.y; \n", SH_FLOAT(idx_min), SH_FLOAT(idx_max)); // Allow a tiny bit of extra overshoot for the smoothed peak if (obj->margin > 0) GLSL("sig_peak *= %s; \n", SH_FLOAT(obj->margin + 1)); GLSL(" input_max = clamp(sqrt(sig_peak), idx_min, idx_max); \n" "} \n"); // Sample the 2D LUT from a position determined by the detected max GLSL("const float input_min = %s; \n" "float scale = 1.0 / (input_max - input_min); \n" "float curve = (input_max - idx_min) / (idx_max - idx_min); \n" "float base = -input_min * scale; \n" "#define tone_map(x) (%s(vec2(scale * sqrt(x) + base, curve))) \n", SH_FLOAT(lut_params.input_min), lut); } else { // Regular 1D LUT const float lut_range = lut_params.input_max - lut_params.input_min; GLSL("#define tone_map(x) (%s(%s * sqrt(x) + %s)) \n", lut, SH_FLOAT(1.0f / lut_range), SH_FLOAT(-lut_params.input_min / lut_range)); } if (mode == PL_TONE_MAP_AUTO) { if (is_noop || pure_bpc || src_max == dst_max) { // No-op, clip, pure BPC, etc. - do this per-channel mode = PL_TONE_MAP_RGB; } else if (src_max / dst_max > 10) { // Extreme reduction: Pick hybrid to avoid blowing out highlights mode = PL_TONE_MAP_HYBRID; } else { mode = PL_TONE_MAP_LUMA; } } ident_t ct = SH_FLOAT(params->tone_mapping_crosstalk); GLSL("const float ct_scale = 1.0 - 3.0 * %s; \n" "float ct = %s * (color.r + color.g + color.b); \n" "color.rgb = ct_scale * color.rgb + vec3(ct); \n", ct, ct); switch (mode) { case PL_TONE_MAP_RGB: for (int c = 0; c < 3; c++) GLSL("color[%d] = tone_map(color[%d]); \n", c, c); break; case PL_TONE_MAP_MAX: GLSL("float sig_max = max(max(color.r, color.g), color.b); \n" "color.rgb *= tone_map(sig_max) / max(sig_max, %s); \n", SH_FLOAT(dst_min)); break; case PL_TONE_MAP_LUMA: case PL_TONE_MAP_HYBRID: { const struct pl_raw_primaries *prim = pl_raw_primaries_get(src->primaries); struct pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(prim); // Normalize X and Z by the white point for (int i = 0; i < 3; i++) { rgb2xyz.m[0][i] /= pl_cie_X(prim->white); rgb2xyz.m[2][i] /= pl_cie_Z(prim->white); rgb2xyz.m[0][i] -= rgb2xyz.m[1][i]; rgb2xyz.m[2][i] -= rgb2xyz.m[1][i]; } GLSL("vec3 xyz = %s * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("rgb2xyz"), .data = PL_TRANSPOSE_3X3(rgb2xyz.m), })); // Tuned to meet the desired desaturation at 1000 -> SDR float desat = dst_max > src_max ? 1.075f : 1.1f; float exponent = logf(desat) / logf(1000 / PL_COLOR_SDR_WHITE); GLSL("float orig = max(xyz.y, %s); \n" "xyz.y = tone_map(xyz.y); \n" "xyz.xz *= pow(xyz.y / orig, %s) * xyz.y / orig; \n", SH_FLOAT(dst_min), SH_FLOAT(exponent)); // Extra luminance correction when reducing dynamic range if (src_max > dst_max) GLSL("xyz.y -= max(0.1 * xyz.x, 0.0); \n"); pl_matrix3x3_invert(&rgb2xyz); GLSL("vec3 color_lin = %s * xyz; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("xyz2rgb"), .data = PL_TRANSPOSE_3X3(rgb2xyz.m), })); if (mode == PL_TONE_MAP_HYBRID) { for (int c = 0; c < 3; c++) GLSL("color[%d] = tone_map(color[%d]); \n", c, c); // coeff(x) = max(a * x^-y, b * x^y) // solve for coeff(dst_min) = 1, coeff(dst_max) = 1 const float y = 2.4f; const float a = powf(dst_min, y); const float b = powf(dst_max, -y); GLSL("float coeff = pow(xyz.y, %f); \n" "coeff = max(%s / coeff, %s * coeff); \n" "color.rgb = mix(color_lin, color.rgb, coeff); \n", y, SH_FLOAT(a), SH_FLOAT(b)); } else { GLSL("color.rgb = color_lin; \n"); } break; } case PL_TONE_MAP_AUTO: case PL_TONE_MAP_MODE_COUNT: pl_unreachable(); } // Inverse crosstalk GLSL("ct = %s * (color.r + color.g + color.b); \n" "color.rgb = (color.rgb - vec3(ct)) / ct_scale; \n", ct); GLSL("#undef tone_map \n"); } static inline bool is_identity_mat(const struct pl_matrix3x3 *mat) { float delta = 0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { const float x = mat->m[i][j]; delta += fabsf((i == j) ? (x - 1) : x); } } return delta < 1e-5f; } static void adapt_colors(pl_shader sh, const struct pl_color_space *src, const struct pl_color_space *dst, const struct pl_color_map_params *params) { bool need_reduction = pl_primaries_superset(&src->hdr.prim, &dst->hdr.prim); bool need_conversion = src->primaries != dst->primaries; if (!need_reduction && !need_conversion) return; // Main gamut adaptation matrix, respecting the desired intent const struct pl_matrix3x3 ref2ref = pl_get_color_mapping_matrix(&src->hdr.prim, &dst->hdr.prim, params->intent); // Normalize colors to range [0-1] float lb = dst->hdr.min_luma / PL_COLOR_SDR_WHITE; float lw = dst->hdr.max_luma / PL_COLOR_SDR_WHITE; GLSL("color.rgb = %s * color.rgb + %s; \n", SH_FLOAT(1 / (lw - lb)), SH_FLOAT(-lb / (lw - lb))); // Convert the input colors to be represented relative to the target // display's mastering primaries. struct pl_matrix3x3 mat; mat = pl_get_color_mapping_matrix(pl_raw_primaries_get(src->primaries), &src->hdr.prim, PL_INTENT_RELATIVE_COLORIMETRIC); pl_matrix3x3_rmul(&ref2ref, &mat); if (!is_identity_mat(&mat)) { GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("src2ref"), .data = PL_TRANSPOSE_3X3(mat.m), })); } enum pl_gamut_mode mode = params->gamut_mode; if (params->gamut_warning) mode = PL_GAMUT_WARN; if (params->gamut_clipping) mode = PL_GAMUT_DESATURATE; if (!need_reduction) mode = PL_GAMUT_CLIP; switch (mode) { case PL_GAMUT_CLIP: GLSL("color.rgb = clamp(color.rgb, 0.0, 1.0); \n"); break; case PL_GAMUT_WARN: GLSL("if (any(lessThan(color.rgb, vec3(-0.005))) || \n" " any(greaterThan(color.rgb, vec3(1.005)))) \n" " color.rgb = vec3(1.0, 0.0, 1.0); // magenta \n"); break; case PL_GAMUT_DARKEN: { float cmax = 1; for (int i = 0; i < 3; i++) cmax = PL_MAX(cmax, ref2ref.m[i][i]); GLSL("color.rgb *= %s; \n", SH_FLOAT(1 / cmax)); break; } case PL_GAMUT_DESATURATE: GLSL("float cmin = min(min(color.r, color.g), color.b); \n" "float luma = clamp(dot(%s, color.rgb), 0.0, 1.0); \n" "if (cmin < 0.0 - 1e-6) \n" " color.rgb = mix(color.rgb, vec3(luma), \n" " -cmin / (luma - cmin)); \n" "float cmax = max(max(color.r, color.g), color.b); \n" "if (cmax > 1.0 + 1e-6) \n" " color.rgb = mix(color.rgb, vec3(luma), \n" " (1.0 - cmax) / (luma - cmax));\n", sh_luma_coeffs(sh, &dst->hdr.prim)); break; case PL_GAMUT_MODE_COUNT: pl_unreachable(); } // Transform the colors from the destination mastering primaries to the // destination nominal primaries mat = pl_get_color_mapping_matrix(&dst->hdr.prim, pl_raw_primaries_get(dst->primaries), PL_INTENT_RELATIVE_COLORIMETRIC); if (!is_identity_mat(&mat)) { GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("ref2dst"), .data = PL_TRANSPOSE_3X3(mat.m), })); } // Undo normalization GLSL("color.rgb = %s * color.rgb + %s; \n", SH_FLOAT(lw - lb), SH_FLOAT(lb)); } void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, pl_shader_obj *tone_map_state, bool prelinearized) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; pl_color_space_infer(&src); pl_color_space_infer_ref(&dst, &src); if (pl_color_space_equal(&src, &dst)) { if (prelinearized) pl_shader_delinearize(sh, &dst); return; } sh_describe(sh, "colorspace conversion"); GLSL("// pl_shader_color_map\n"); GLSL("{\n"); params = PL_DEF(params, &pl_color_map_default_params); if (!prelinearized) pl_shader_linearize(sh, &src); tone_map(sh, &src, &dst, tone_map_state, params); adapt_colors(sh, &src, &dst, params); pl_shader_delinearize(sh, &dst); GLSL("}\n"); } void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp, const struct pl_cone_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (!params || !params->cones) return; sh_describe(sh, "cone distortion"); GLSL("// pl_shader_cone_distort\n"); GLSL("{\n"); pl_color_space_infer(&csp); pl_shader_linearize(sh, &csp); struct pl_matrix3x3 cone_mat; cone_mat = pl_get_cone_matrix(params, pl_raw_primaries_get(csp.primaries)); GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cone_mat"), .data = PL_TRANSPOSE_3X3(cone_mat.m), })); pl_shader_delinearize(sh, &csp); GLSL("}\n"); } struct sh_dither_obj { enum pl_dither_method method; pl_shader_obj lut; }; static void sh_dither_uninit(pl_gpu gpu, void *ptr) { struct sh_dither_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); *obj = (struct sh_dither_obj) {0}; } static void fill_dither_matrix(void *data, const struct sh_lut_params *params) { pl_assert(params->width > 0 && params->height > 0 && params->comps == 1); const struct sh_dither_obj *obj = params->priv; switch (obj->method) { case PL_DITHER_ORDERED_LUT: pl_assert(params->width == params->height); pl_generate_bayer_matrix(data, params->width); return; case PL_DITHER_BLUE_NOISE: pl_assert(params->width == params->height); pl_generate_blue_noise(data, params->width); return; case PL_DITHER_ORDERED_FIXED: case PL_DITHER_WHITE_NOISE: case PL_DITHER_METHOD_COUNT: return; } pl_unreachable(); } static bool dither_method_is_lut(enum pl_dither_method method) { switch (method) { case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: return true; case PL_DITHER_ORDERED_FIXED: case PL_DITHER_WHITE_NOISE: return false; case PL_DITHER_METHOD_COUNT: break; } pl_unreachable(); } void pl_shader_dither(pl_shader sh, int new_depth, pl_shader_obj *dither_state, const struct pl_dither_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (new_depth <= 0 || new_depth > 256) { PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth); return; } sh_describe(sh, "dithering"); GLSL("// pl_shader_dither \n" "{ \n" "float bias; \n"); params = PL_DEF(params, &pl_dither_default_params); if (params->lut_size < 0 || params->lut_size > 8) { SH_FAIL(sh, "Invalid `lut_size` specified: %d", params->lut_size); return; } enum pl_dither_method method = params->method; bool can_fixed = sh_glsl(sh).version >= 130; ident_t lut = NULL; int lut_size = 0; if (method == PL_DITHER_ORDERED_FIXED && !can_fixed) { PL_WARN(sh, "PL_DITHER_ORDERED_FIXED requires glsl version >= 130.." " falling back."); goto fallback; } if (dither_method_is_lut(method)) { if (!dither_state) { PL_WARN(sh, "LUT-based dither method specified but no dither state " "object given, falling back to non-LUT based methods."); goto fallback; } struct sh_dither_obj *obj; obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER, struct sh_dither_obj, sh_dither_uninit); if (!obj) goto fallback; bool changed = obj->method != method; obj->method = method; lut_size = 1 << PL_DEF(params->lut_size, 6); lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .type = PL_VAR_FLOAT, .width = lut_size, .height = lut_size, .comps = 1, .update = changed, .fill = fill_dither_matrix, .priv = obj, )); if (!lut) goto fallback; } goto done; fallback: method = can_fixed ? PL_DITHER_ORDERED_FIXED : PL_DITHER_WHITE_NOISE; // fall through done: ; int size = 0; if (lut) { size = lut_size; } else if (method == PL_DITHER_ORDERED_FIXED) { size = 16; // hard-coded size } if (size) { // Transform the screen position to the cyclic range [0,1) GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/%s);\n", SH_FLOAT(size)); if (params->temporal) { int phase = SH_PARAMS(sh).index % 8; float r = phase * (M_PI / 2); // rotate float m = phase < 4 ? 1 : -1; // mirror float mat[2][2] = { {cos(r), -sin(r) }, {sin(r) * m, cos(r) * m}, }; ident_t rot = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("dither_rot"), .data = &mat[0][0], .dynamic = true, }); GLSL("pos = fract(%s * pos + vec2(1.0));\n", rot); } } switch (method) { case PL_DITHER_WHITE_NOISE: { ident_t prng = sh_prng(sh, params->temporal, NULL); GLSL("bias = %s.x;\n", prng); break; } case PL_DITHER_ORDERED_FIXED: // Bitwise ordered dither using only 32-bit uints GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u; \n" // Bitwise merge (morton number) "xy.x = xy.x ^ xy.y; \n" "xy = (xy | xy << 2) & uvec2(0x33333333); \n" "xy = (xy | xy << 1) & uvec2(0x55555555); \n" // Bitwise inversion "uint b = xy.x + (xy.y << 1); \n" "b = (b * 0x0802u & 0x22110u) | \n" " (b * 0x8020u & 0x88440u); \n" "b = 0x10101u * b; \n" "b = (b >> 16) & 0xFFu; \n" // Generate bias value "bias = float(b) * 1.0/256.0; \n"); break; case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: pl_assert(lut); GLSL("bias = %s(ivec2(pos * %s));\n", lut, SH_FLOAT(lut_size)); break; case PL_DITHER_METHOD_COUNT: pl_unreachable(); } uint64_t scale = (1LLU << new_depth) - 1; GLSL("color = vec4(%llu.0) * color + vec4(bias); \n" "color = floor(color) * vec4(1.0 / %llu.0); \n" "} \n", (long long unsigned) scale, (long long unsigned) scale); } const struct pl_dither_params pl_dither_default_params = { PL_DITHER_DEFAULTS }; libplacebo-v4.192.1/src/shaders/custom.c000066400000000000000000001417001417677245700200770ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "gpu.h" #include "shaders.h" bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params) { if (params->compute) { int bw = PL_DEF(params->compute_group_size[0], 16); int bh = PL_DEF(params->compute_group_size[1], 16); bool flex = !params->compute_group_size[0] || !params->compute_group_size[1]; if (!sh_try_compute(sh, bw, bh, flex, params->compute_shmem)) return false; } if (!sh_require(sh, params->input, params->output_w, params->output_h)) return false; sh->res.output = params->output; // Attach the variables, descriptors etc. directly instead of going via // `sh_var` / `sh_desc` etc. to avoid generating fresh names for (int i = 0; i < params->num_variables; i++) { struct pl_shader_var sv = params->variables[i]; sv.data = pl_memdup(SH_TMP(sh), sv.data, pl_var_host_layout(0, &sv.var).size); sv.var.name = pl_strdup0(SH_TMP(sh), pl_str0(sv.var.name)); PL_ARRAY_APPEND(sh, sh->vars, sv); } for (int i = 0; i < params->num_descriptors; i++) { struct pl_shader_desc sd = params->descriptors[i]; size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars; if (bsize) sd.buffer_vars = pl_memdup(SH_TMP(sh), sd.buffer_vars, bsize); sd.desc.name = pl_strdup0(SH_TMP(sh), pl_str0(sd.desc.name)); PL_ARRAY_APPEND(sh, sh->descs, sd); } for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_shader_va sva = params->vertex_attribs[i]; size_t vsize = sva.attr.fmt->texel_size; for (int n = 0; n < PL_ARRAY_SIZE(sva.data); n++) sva.data[n] = pl_memdup(SH_TMP(sh), sva.data[n], vsize); sva.attr.name = pl_strdup0(SH_TMP(sh), pl_str0(sva.attr.name)); PL_ARRAY_APPEND(sh, sh->vas, sva); } for (int i = 0; i < params->num_constants; i++) { struct pl_shader_const sc = params->constants[i]; size_t csize = pl_var_type_size(sc.type); sc.data = pl_memdup(SH_TMP(sh), sc.data, csize); sc.name = pl_strdup0(SH_TMP(sh), pl_str0(sc.name)); PL_ARRAY_APPEND(sh, sh->consts, sc); } if (params->prelude) GLSLP("// pl_shader_custom prelude: \n%s\n", params->prelude); if (params->header) GLSLH("// pl_shader_custom header: \n%s\n", params->header); if (params->description) sh_describe(sh, pl_strdup0(SH_TMP(sh), pl_str0(params->description))); if (params->body) { const char *output_decl = ""; if (params->output != params->input) { switch (params->output) { case PL_SHADER_SIG_NONE: break; case PL_SHADER_SIG_COLOR: output_decl = "vec4 color = vec4(0.0);"; break; case PL_SHADER_SIG_SAMPLER: pl_unreachable(); } } GLSL("// pl_shader_custom \n" "%s \n" "{ \n" "%s \n" "} \n", output_decl, params->body); } return true; } // Hard-coded size limits, mainly for convenience (to avoid dynamic memory) #define SHADER_MAX_HOOKS 16 #define SHADER_MAX_BINDS 16 #define MAX_SZEXP_SIZE 32 enum szexp_op { SZEXP_OP_ADD, SZEXP_OP_SUB, SZEXP_OP_MUL, SZEXP_OP_DIV, SZEXP_OP_NOT, SZEXP_OP_GT, SZEXP_OP_LT, }; enum szexp_tag { SZEXP_END = 0, // End of an RPN expression SZEXP_CONST, // Push a constant value onto the stack SZEXP_VAR_W, // Get the width/height of a named texture (variable) SZEXP_VAR_H, SZEXP_OP2, // Pop two elements and push the result of a dyadic operation SZEXP_OP1, // Pop one element and push the result of a monadic operation }; struct szexp { enum szexp_tag tag; union { float cval; pl_str varname; enum szexp_op op; } val; }; struct custom_shader_hook { // Variable/literal names of textures pl_str pass_desc; pl_str hook_tex[SHADER_MAX_HOOKS]; pl_str bind_tex[SHADER_MAX_BINDS]; pl_str save_tex; // Shader body itself + metadata pl_str pass_body; float offset[2]; bool offset_align; int comps; // Special expressions governing the output size and execution conditions struct szexp width[MAX_SZEXP_SIZE]; struct szexp height[MAX_SZEXP_SIZE]; struct szexp cond[MAX_SZEXP_SIZE]; // Special metadata for compute shaders bool is_compute; int block_w, block_h; // Block size (each block corresponds to one WG) int threads_w, threads_h; // How many threads form a WG }; static bool parse_rpn_szexpr(pl_str line, struct szexp out[MAX_SZEXP_SIZE]) { int pos = 0; while (line.len > 0) { pl_str word = pl_str_split_char(line, ' ', &line); if (word.len == 0) continue; if (pos >= MAX_SZEXP_SIZE) return false; struct szexp *exp = &out[pos++]; if (pl_str_eatend0(&word, ".w") || pl_str_eatend0(&word, ".width")) { exp->tag = SZEXP_VAR_W; exp->val.varname = word; continue; } if (pl_str_eatend0(&word, ".h") || pl_str_eatend0(&word, ".height")) { exp->tag = SZEXP_VAR_H; exp->val.varname = word; continue; } switch (word.buf[0]) { case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue; case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue; case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue; case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue; case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue; case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT; continue; case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT; continue; } if (word.buf[0] >= '0' && word.buf[0] <= '9') { exp->tag = SZEXP_CONST; if (!pl_str_parse_float(word, &exp->val.cval)) return false; continue; } // Some sort of illegal expression return false; } return true; } // Evaluate a `szexp`, given a lookup function for named textures // Returns whether successful. 'result' is left untouched on failure static bool pl_eval_szexpr(pl_log log, void *priv, bool (*lookup)(void *priv, pl_str var, float size[2]), const struct szexp expr[MAX_SZEXP_SIZE], float *result) { float stack[MAX_SZEXP_SIZE] = {0}; int idx = 0; // points to next element to push for (int i = 0; i < MAX_SZEXP_SIZE; i++) { switch (expr[i].tag) { case SZEXP_END: goto done; case SZEXP_CONST: // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be // impossible to overflow the stack assert(idx < MAX_SZEXP_SIZE); stack[idx++] = expr[i].val.cval; continue; case SZEXP_OP1: if (idx < 1) { pl_warn(log, "Stack underflow in RPN expression!"); return false; } switch (expr[i].val.op) { case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; default: pl_unreachable(); } continue; case SZEXP_OP2: if (idx < 2) { pl_warn(log, "Stack underflow in RPN expression!"); return false; } // Pop the operands in reverse order float op2 = stack[--idx]; float op1 = stack[--idx]; float res = 0.0; switch (expr[i].val.op) { case SZEXP_OP_ADD: res = op1 + op2; break; case SZEXP_OP_SUB: res = op1 - op2; break; case SZEXP_OP_MUL: res = op1 * op2; break; case SZEXP_OP_DIV: res = op1 / op2; break; case SZEXP_OP_GT: res = op1 > op2; break; case SZEXP_OP_LT: res = op1 < op2; break; case SZEXP_OP_NOT: pl_unreachable(); } if (!isfinite(res)) { pl_warn(log, "Illegal operation in RPN expression!"); return false; } stack[idx++] = res; continue; case SZEXP_VAR_W: case SZEXP_VAR_H: { pl_str name = expr[i].val.varname; float size[2]; if (!lookup(priv, name, size)) { pl_warn(log, "Variable '%.*s' not found in RPN expression!", PL_STR_FMT(name)); return false; } stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1]; continue; } } } done: // Return the single stack element if (idx != 1) { pl_warn(log, "Malformed stack after RPN expression!"); return false; } *result = stack[0]; return true; } static inline pl_str split_magic(pl_str *body) { pl_str ret = pl_str_split_str0(*body, "//!", body); if (body->len) { // Make sure the separator is included in the remainder body->buf -= 3; body->len += 3; } return ret; } static bool parse_hook(pl_log log, pl_str *body, struct custom_shader_hook *out) { *out = (struct custom_shader_hook){ .pass_desc = pl_str0("unknown user shader"), .width = {{ SZEXP_VAR_W, { .varname = pl_str0("HOOKED") }}}, .height = {{ SZEXP_VAR_H, { .varname = pl_str0("HOOKED") }}}, .cond = {{ SZEXP_CONST, { .cval = 1.0 }}}, }; int hook_idx = 0; int bind_idx = 0; // Parse all headers while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); // Check for the presence of the magic line beginning if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; // Parse the supported commands if (pl_str_eatstart0(&line, "HOOK")) { if (hook_idx == SHADER_MAX_HOOKS) { pl_err(log, "Passes may only hook up to %d textures!", SHADER_MAX_HOOKS); return false; } out->hook_tex[hook_idx++] = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "BIND")) { if (bind_idx == SHADER_MAX_BINDS) { pl_err(log, "Passes may only bind up to %d textures!", SHADER_MAX_BINDS); return false; } out->bind_tex[bind_idx++] = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "SAVE")) { pl_str save_tex = pl_str_strip(line); if (pl_str_equals0(save_tex, "HOOKED")) { // This is a special name that means "overwrite existing" // texture, which we just signal by not having any `save_tex` // name set. out->save_tex = (pl_str) {0}; } else { out->save_tex = save_tex; }; continue; } if (pl_str_eatstart0(&line, "DESC")) { out->pass_desc = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "OFFSET")) { line = pl_str_strip(line); if (pl_str_equals0(line, "ALIGN")) { out->offset_align = true; } else { if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &out->offset[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &out->offset[1]) || line.len) { pl_err(log, "Error while parsing OFFSET!"); return false; } } continue; } if (pl_str_eatstart0(&line, "WIDTH")) { if (!parse_rpn_szexpr(line, out->width)) { pl_err(log, "Error while parsing WIDTH!"); return false; } continue; } if (pl_str_eatstart0(&line, "HEIGHT")) { if (!parse_rpn_szexpr(line, out->height)) { pl_err(log, "Error while parsing HEIGHT!"); return false; } continue; } if (pl_str_eatstart0(&line, "WHEN")) { if (!parse_rpn_szexpr(line, out->cond)) { pl_err(log, "Error while parsing WHEN!"); return false; } continue; } if (pl_str_eatstart0(&line, "COMPONENTS")) { if (!pl_str_parse_int(pl_str_strip(line), &out->comps)) { pl_err(log, "Error parsing COMPONENTS: '%.*s'", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "COMPUTE")) { line = pl_str_strip(line); bool ok = pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->block_w) && pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->block_h); line = pl_str_strip(line); if (ok && line.len) { ok = pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->threads_w) && pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->threads_h) && !line.len; } else { out->threads_w = out->block_w; out->threads_h = out->block_h; } if (!ok) { pl_err(log, "Error while parsing COMPUTE!"); return false; } out->is_compute = true; continue; } // Unknown command type pl_err(log, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } // The rest of the file up until the next magic line beginning (if any) // shall be the shader body out->pass_body = split_magic(body); // Sanity checking if (hook_idx == 0) pl_warn(log, "Pass has no hooked textures (will be ignored)!"); return true; } static bool parse_tex(pl_gpu gpu, void *alloc, pl_str *body, struct pl_shader_desc *out) { *out = (struct pl_shader_desc) { .desc = { .name = "USER_TEX", .type = PL_DESC_SAMPLED_TEX, }, }; struct pl_tex_params params = { .w = 1, .h = 1, .d = 0, .sampleable = true, .debug_tag = PL_DEBUG_TAG, }; while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; if (pl_str_eatstart0(&line, "TEXTURE")) { out->desc.name = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "SIZE")) { line = pl_str_strip(line); int dims = 0; int dim[4]; // extra space to catch invalid extra entries while (line.len && dims < PL_ARRAY_SIZE(dim)) { if (!pl_str_parse_int(pl_str_split_char(line, ' ', &line), &dim[dims++])) { PL_ERR(gpu, "Error while parsing SIZE!"); return false; } } uint32_t lim = dims == 1 ? gpu->limits.max_tex_1d_dim : dims == 2 ? gpu->limits.max_tex_2d_dim : dims == 3 ? gpu->limits.max_tex_3d_dim : 0; // Sanity check against GPU size limits switch (dims) { case 3: params.d = dim[2]; if (params.d < 1 || params.d > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.d, lim); return false; } // fall through case 2: params.h = dim[1]; if (params.h < 1 || params.h > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.h, lim); return false; } // fall through case 1: params.w = dim[0]; if (params.w < 1 || params.w > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.w, lim); return false; } break; default: PL_ERR(gpu, "Invalid number of texture dimensions!"); return false; }; // Clear out the superfluous components if (dims < 3) params.d = 0; if (dims < 2) params.h = 0; continue; } if (pl_str_eatstart0(&line, "FORMAT ")) { line = pl_str_strip(line); params.format = NULL; for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (pl_str_equals0(line, fmt->name)) { params.format = fmt; break; } } if (!params.format || params.format->opaque) { PL_ERR(gpu, "Unrecognized/unavailable FORMAT name: '%.*s'!", PL_STR_FMT(line)); return false; } if (!(params.format->caps & PL_FMT_CAP_SAMPLEABLE)) { PL_ERR(gpu, "Chosen FORMAT '%.*s' is not sampleable!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "FILTER")) { line = pl_str_strip(line); if (pl_str_equals0(line, "LINEAR")) { out->binding.sample_mode = PL_TEX_SAMPLE_LINEAR; } else if (pl_str_equals0(line, "NEAREST")) { out->binding.sample_mode = PL_TEX_SAMPLE_NEAREST; } else { PL_ERR(gpu, "Unrecognized FILTER: '%.*s'!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "BORDER")) { line = pl_str_strip(line); if (pl_str_equals0(line, "CLAMP")) { out->binding.address_mode = PL_TEX_ADDRESS_CLAMP; } else if (pl_str_equals0(line, "REPEAT")) { out->binding.address_mode = PL_TEX_ADDRESS_REPEAT; } else if (pl_str_equals0(line, "MIRROR")) { out->binding.address_mode = PL_TEX_ADDRESS_MIRROR; } else { PL_ERR(gpu, "Unrecognized BORDER: '%.*s'!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "STORAGE")) { params.storable = true; out->desc.type = PL_DESC_STORAGE_IMG; out->desc.access = PL_DESC_ACCESS_READWRITE; out->memory = PL_MEMORY_COHERENT; continue; } PL_ERR(gpu, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } if (!params.format) { PL_ERR(gpu, "No FORMAT specified!"); return false; } int caps = params.format->caps; if (out->binding.sample_mode == PL_TEX_SAMPLE_LINEAR && !(caps & PL_FMT_CAP_LINEAR)) { PL_ERR(gpu, "The specified texture format cannot be linear filtered!"); return false; } // Decode the rest of the section (up to the next //! marker) as raw hex // data for the texture pl_str tex, hexdata = split_magic(body); if (!pl_str_decode_hex(NULL, pl_str_strip(hexdata), &tex)) { PL_ERR(gpu, "Error while parsing TEXTURE body: must be a valid " "hexadecimal sequence!"); return false; } int texels = params.w * PL_DEF(params.h, 1) * PL_DEF(params.d, 1); size_t expected_len = texels * params.format->texel_size; if (tex.len == 0 && params.storable) { // In this case, it's okay that the texture has no initial data pl_free_ptr(&tex.buf); } else if (tex.len != expected_len) { PL_ERR(gpu, "Shader TEXTURE size mismatch: got %zu bytes, expected %zu!", tex.len, expected_len); pl_free(tex.buf); return false; } params.initial_data = tex.buf; out->binding.object = pl_tex_create(gpu, ¶ms); pl_free(tex.buf); if (!out->binding.object) { PL_ERR(gpu, "Failed creating custom texture!"); return false; } return true; } static bool parse_buf(pl_gpu gpu, void *alloc, pl_str *body, struct pl_shader_desc *out) { *out = (struct pl_shader_desc) { .desc = { .name = "USER_BUF", .type = PL_DESC_BUF_UNIFORM, }, }; // Temporary, to allow deferring variable placement until all headers // have been processed (in order to e.g. determine buffer type) void *tmp = pl_tmp(alloc); // will be freed automatically on failure PL_ARRAY(struct pl_var) vars = {0}; while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; if (pl_str_eatstart0(&line, "BUFFER")) { out->desc.name = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "STORAGE")) { out->desc.type = PL_DESC_BUF_STORAGE; out->desc.access = PL_DESC_ACCESS_READWRITE; out->memory = PL_MEMORY_COHERENT; continue; } if (pl_str_eatstart0(&line, "VAR")) { pl_str type_name = pl_str_split_char(pl_str_strip(line), ' ', &line); struct pl_var var = {0}; for (const struct pl_named_var *nv = pl_var_glsl_types; nv->glsl_name; nv++) { if (pl_str_equals0(type_name, nv->glsl_name)) { var = nv->var; break; } } if (!var.type) { // No type found PL_ERR(gpu, "Unrecognized GLSL type '%.*s'!", PL_STR_FMT(type_name)); return false; } pl_str var_name = pl_str_split_char(line, '[', &line); if (line.len > 0) { // Parse array dimension if (!pl_str_parse_int(pl_str_split_char(line, ']', NULL), &var.dim_a)) { PL_ERR(gpu, "Failed parsing array dimension from [%.*s!", PL_STR_FMT(line)); return false; } if (var.dim_a < 1) { PL_ERR(gpu, "Invalid array dimension %d!", var.dim_a); return false; } } var.name = pl_strdup0(alloc, pl_str_strip(var_name)); PL_ARRAY_APPEND(tmp, vars, var); continue; } PL_ERR(gpu, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } // Try placing all of the buffer variables for (int i = 0; i < vars.num; i++) { if (!sh_buf_desc_append(alloc, gpu, out, NULL, vars.elem[i])) { PL_ERR(gpu, "Custom buffer exceeds GPU limitations!"); return false; } } // Decode the rest of the section (up to the next //! marker) as raw hex // data for the buffer pl_str data, hexdata = split_magic(body); if (!pl_str_decode_hex(tmp, pl_str_strip(hexdata), &data)) { PL_ERR(gpu, "Error while parsing BUFFER body: must be a valid " "hexadecimal sequence!"); return false; } size_t buf_size = sh_buf_desc_size(out); if (data.len == 0 && out->desc.type == PL_DESC_BUF_STORAGE) { // In this case, it's okay that the buffer has no initial data } else if (data.len != buf_size) { PL_ERR(gpu, "Shader BUFFER size mismatch: got %zu bytes, expected %zu!", data.len, buf_size); return false; } out->binding.object = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .uniform = out->desc.type == PL_DESC_BUF_UNIFORM, .storable = out->desc.type == PL_DESC_BUF_STORAGE, .initial_data = data.len ? data.buf : NULL, )); if (!out->binding.object) { PL_ERR(gpu, "Failed creating custom buffer!"); return false; } pl_free(tmp); return true; } static enum pl_hook_stage mp_stage_to_pl(pl_str stage) { if (pl_str_equals0(stage, "RGB")) return PL_HOOK_RGB_INPUT; if (pl_str_equals0(stage, "LUMA")) return PL_HOOK_LUMA_INPUT; if (pl_str_equals0(stage, "CHROMA")) return PL_HOOK_CHROMA_INPUT; if (pl_str_equals0(stage, "ALPHA")) return PL_HOOK_ALPHA_INPUT; if (pl_str_equals0(stage, "XYZ")) return PL_HOOK_XYZ_INPUT; if (pl_str_equals0(stage, "CHROMA_SCALED")) return PL_HOOK_CHROMA_SCALED; if (pl_str_equals0(stage, "ALPHA_SCALED")) return PL_HOOK_ALPHA_SCALED; if (pl_str_equals0(stage, "NATIVE")) return PL_HOOK_NATIVE; if (pl_str_equals0(stage, "MAINPRESUB")) return PL_HOOK_RGB; if (pl_str_equals0(stage, "MAIN")) return PL_HOOK_RGB; // Note: conflicts with above! if (pl_str_equals0(stage, "LINEAR")) return PL_HOOK_LINEAR; if (pl_str_equals0(stage, "SIGMOID")) return PL_HOOK_SIGMOID; if (pl_str_equals0(stage, "PREKERNEL")) return PL_HOOK_PRE_KERNEL; if (pl_str_equals0(stage, "POSTKERNEL")) return PL_HOOK_POST_KERNEL; if (pl_str_equals0(stage, "SCALED")) return PL_HOOK_SCALED; if (pl_str_equals0(stage, "OUTPUT")) return PL_HOOK_OUTPUT; return 0; } static pl_str pl_stage_to_mp(enum pl_hook_stage stage) { switch (stage) { case PL_HOOK_RGB_INPUT: return pl_str0("RGB"); case PL_HOOK_LUMA_INPUT: return pl_str0("LUMA"); case PL_HOOK_CHROMA_INPUT: return pl_str0("CHROMA"); case PL_HOOK_ALPHA_INPUT: return pl_str0("ALPHA"); case PL_HOOK_XYZ_INPUT: return pl_str0("XYZ"); case PL_HOOK_CHROMA_SCALED: return pl_str0("CHROMA_SCALED"); case PL_HOOK_ALPHA_SCALED: return pl_str0("ALPHA_SCALED"); case PL_HOOK_NATIVE: return pl_str0("NATIVE"); case PL_HOOK_RGB: return pl_str0("MAINPRESUB"); case PL_HOOK_LINEAR: return pl_str0("LINEAR"); case PL_HOOK_SIGMOID: return pl_str0("SIGMOID"); case PL_HOOK_PRE_OVERLAY: return pl_str0("PREOVERLAY"); // Note: doesn't exist! case PL_HOOK_PRE_KERNEL: return pl_str0("PREKERNEL"); case PL_HOOK_POST_KERNEL: return pl_str0("POSTKERNEL"); case PL_HOOK_SCALED: return pl_str0("SCALED"); case PL_HOOK_OUTPUT: return pl_str0("OUTPUT"); }; pl_unreachable(); } struct hook_pass { enum pl_hook_stage exec_stages; struct custom_shader_hook hook; }; struct pass_tex { pl_str name; pl_tex tex; // Metadata struct pl_rect2df rect; struct pl_color_repr repr; struct pl_color_space color; int comps; }; struct hook_priv { pl_log log; pl_gpu gpu; void *alloc; PL_ARRAY(struct hook_pass) hook_passes; // Fixed (for shader-local resources) PL_ARRAY(struct pl_shader_desc) descriptors; // Dynamic per pass enum pl_hook_stage save_stages; PL_ARRAY(struct pass_tex) pass_textures; // State for PRNG/frame count int frame_count; uint64_t prng_state[4]; }; static void hook_reset(void *priv) { struct hook_priv *p = priv; p->pass_textures.num = 0; } struct szexp_ctx { struct hook_priv *priv; const struct pl_hook_params *params; struct pass_tex hooked; }; static bool lookup_tex(void *priv, pl_str var, float size[2]) { struct szexp_ctx *ctx = priv; struct hook_priv *p = ctx->priv; const struct pl_hook_params *params = ctx->params; if (pl_str_equals0(var, "HOOKED")) { pl_assert(ctx->hooked.tex); size[0] = ctx->hooked.tex->params.w; size[1] = ctx->hooked.tex->params.h; return true; } if (pl_str_equals0(var, "NATIVE_CROPPED")) { size[0] = fabs(pl_rect_w(params->src_rect)); size[1] = fabs(pl_rect_h(params->src_rect)); return true; } if (pl_str_equals0(var, "OUTPUT")) { size[0] = abs(pl_rect_w(params->dst_rect)); size[1] = abs(pl_rect_h(params->dst_rect)); return true; } if (pl_str_equals0(var, "MAIN")) var = pl_str0("MAINPRESUB"); for (int i = 0; i < p->pass_textures.num; i++) { if (pl_str_equals(var, p->pass_textures.elem[i].name)) { pl_tex tex = p->pass_textures.elem[i].tex; size[0] = tex->params.w; size[1] = tex->params.h; return true; } } return false; } static double prng_step(uint64_t s[4]) { const uint64_t result = s[0] + s[3]; const uint64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = (s[3] << 45) | (s[3] >> (64 - 45)); return (result >> 11) * 0x1.0p-53; } static bool bind_pass_tex(pl_shader sh, pl_str name, const struct pass_tex *ptex, const struct pl_rect2df *rect, bool hooked, bool mainpresub) { ident_t id, pos, size, pt; // Compatibility with mpv texture binding semantics id = sh_bind(sh, ptex->tex, PL_TEX_ADDRESS_CLAMP, PL_TEX_SAMPLE_LINEAR, "hook_tex", rect, &pos, &size, &pt); if (!id) return false; GLSLH("#define %.*s_raw %s \n", PL_STR_FMT(name), id); GLSLH("#define %.*s_pos %s \n", PL_STR_FMT(name), pos); GLSLH("#define %.*s_map %s_map \n", PL_STR_FMT(name), pos); GLSLH("#define %.*s_size %s \n", PL_STR_FMT(name), size); GLSLH("#define %.*s_pt %s \n", PL_STR_FMT(name), pt); float off[2] = { ptex->rect.x0, ptex->rect.y0 }; GLSLH("#define %.*s_off %s \n", PL_STR_FMT(name), sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("offset"), .data = off, })); struct pl_color_repr repr = ptex->repr; ident_t scale = SH_FLOAT(pl_color_repr_normalize(&repr)); GLSLH("#define %.*s_mul %s \n", PL_STR_FMT(name), scale); // Compatibility with mpv GLSLH("#define %.*s_rot mat2(1.0, 0.0, 0.0, 1.0) \n", PL_STR_FMT(name)); // Sampling function boilerplate GLSLH("#define %.*s_tex(pos) (%s * vec4(%s(%s, pos))) \n", PL_STR_FMT(name), scale, sh_tex_fn(sh, ptex->tex->params), id); GLSLH("#define %.*s_texOff(off) (%.*s_tex(%s + %s * vec2(off))) \n", PL_STR_FMT(name), PL_STR_FMT(name), pos, pt); bool can_gather = ptex->tex->params.format->gatherable; if (can_gather) { GLSLH("#define %.*s_gather(pos, c) (%s * vec4(textureGather(%s, pos, c))) \n", PL_STR_FMT(name), scale, id); } if (hooked) { GLSLH("#define HOOKED_raw %.*s_raw \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_pos %.*s_pos \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_size %.*s_size \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_rot %.*s_rot \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_off %.*s_off \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_pt %.*s_pt \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_map %.*s_map \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_mul %.*s_mul \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_tex %.*s_tex \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_texOff %.*s_texOff \n", PL_STR_FMT(name)); if (can_gather) GLSLH("#define HOOKED_gather %.*s_gather \n", PL_STR_FMT(name)); } if (mainpresub) { GLSLH("#define MAIN_raw MAINPRESUB_raw \n"); GLSLH("#define MAIN_pos MAINPRESUB_pos \n"); GLSLH("#define MAIN_size MAINPRESUB_size \n"); GLSLH("#define MAIN_rot MAINPRESUB_rot \n"); GLSLH("#define MAIN_off MAINPRESUB_off \n"); GLSLH("#define MAIN_pt MAINPRESUB_pt \n"); GLSLH("#define MAIN_map MAINPRESUB_map \n"); GLSLH("#define MAIN_mul MAINPRESUB_mul \n"); GLSLH("#define MAIN_tex MAINPRESUB_tex \n"); GLSLH("#define MAIN_texOff MAINPRESUB_texOff \n"); if (can_gather) GLSLH("#define MAIN_gather MAINPRESUB_gather \n"); } return true; } static void save_pass_tex(struct hook_priv *p, struct pass_tex ptex) { for (int i = 0; i < p->pass_textures.num; i++) { if (!pl_str_equals(p->pass_textures.elem[i].name, ptex.name)) continue; p->pass_textures.elem[i] = ptex; return; } // No texture with this name yet, append new one PL_ARRAY_APPEND(p->alloc, p->pass_textures, ptex); } static struct pl_hook_res hook_hook(void *priv, const struct pl_hook_params *params) { struct hook_priv *p = priv; pl_str stage = pl_stage_to_mp(params->stage); struct pl_hook_res res = {0}; pl_shader sh = NULL; struct szexp_ctx scope = { .priv = p, .params = params, .hooked = { .name = stage, .tex = params->tex, .rect = params->rect, .repr = params->repr, .color = params->color, .comps = params->components, }, }; // Save the input texture if needed if (p->save_stages & params->stage) { PL_TRACE(p, "Saving input texture '%.*s' for binding", PL_STR_FMT(scope.hooked.name)); save_pass_tex(p, scope.hooked); } for (int n = 0; n < p->hook_passes.num; n++) { const struct hook_pass *pass = &p->hook_passes.elem[n]; if (!(pass->exec_stages & params->stage)) continue; const struct custom_shader_hook *hook = &pass->hook; PL_TRACE(p, "Executing hook pass %d on stage '%.*s': %.*s", n, PL_STR_FMT(stage), PL_STR_FMT(hook->pass_desc)); // Test for execution condition float run = 0; if (!pl_eval_szexpr(p->log, &scope, lookup_tex, hook->cond, &run)) goto error; if (!run) { PL_TRACE(p, "Skipping hook due to condition"); continue; } float out_size[2] = {0}; if (!pl_eval_szexpr(p->log, &scope, lookup_tex, hook->width, &out_size[0]) || !pl_eval_szexpr(p->log, &scope, lookup_tex, hook->height, &out_size[1])) { goto error; } int out_w = roundf(out_size[0]), out_h = roundf(out_size[1]); // Generate a new texture to store the render result pl_tex fbo; fbo = params->get_tex(params->priv, out_w, out_h); if (!fbo) { PL_ERR(p, "Failed dispatching hook: `get_tex` callback failed?"); goto error; } // Generate a new shader object sh = pl_dispatch_begin(params->dispatch); if (!sh_require(sh, PL_SHADER_SIG_NONE, out_w, out_h)) goto error; if (hook->is_compute) { if (!sh_try_compute(sh, hook->threads_w, hook->threads_h, false, 0) || !fbo->params.storable) { PL_ERR(p, "Failed dispatching COMPUTE shader"); goto error; } } else { // Default non-COMPUTE shaders to explicitly use fragment shaders // only, to avoid breaking things like fwidth() sh->type = PL_DEF(sh->type, SH_FRAGMENT); } // Bind all necessary input textures for (int i = 0; i < PL_ARRAY_SIZE(hook->bind_tex); i++) { pl_str texname = hook->bind_tex[i]; if (!texname.len) break; // Convenience alias, to allow writing shaders that are oblivious // of the exact stage they hooked. This simply translates to // whatever stage actually fired the hook. bool hooked = false, mainpresub = false; if (pl_str_equals0(texname, "HOOKED")) { // Continue with binding this, under the new name texname = stage; hooked = true; } // Compatibility alias, because MAIN and MAINPRESUB mean the same // thing to libplacebo, but user shaders are still written as // though they can be different concepts. if (pl_str_equals0(texname, "MAIN") || pl_str_equals0(texname, "MAINPRESUB")) { texname = pl_str0("MAINPRESUB"); mainpresub = true; } for (int j = 0; j < p->descriptors.num; j++) { if (pl_str_equals0(texname, p->descriptors.elem[j].desc.name)) { // Directly bind this, no need to bother with all the // `bind_pass_tex` boilerplate ident_t id = sh_desc(sh, p->descriptors.elem[j]); GLSLH("#define %.*s %s \n", PL_STR_FMT(texname), id); if (p->descriptors.elem[j].desc.type == PL_DESC_SAMPLED_TEX) { pl_tex tex = p->descriptors.elem[j].binding.object; GLSLH("#define %.*s_tex(pos) (%s(%s, pos)) \n", PL_STR_FMT(texname), sh_tex_fn(sh, tex->params), id); } goto next_bind; } } for (int j = 0; j < p->pass_textures.num; j++) { if (pl_str_equals(texname, p->pass_textures.elem[j].name)) { // Note: We bind the whole texture, rather than // params->rect, because user shaders in general are not // designed to handle cropped input textures. const struct pass_tex *ptex = &p->pass_textures.elem[j]; struct pl_rect2df rect = { 0, 0, ptex->tex->params.w, ptex->tex->params.h, }; if (hook->offset_align && pl_str_equals(texname, stage)) { float sx = pl_rect_w(params->rect) / pl_rect_w(params->src_rect), sy = pl_rect_h(params->rect) / pl_rect_h(params->src_rect), ox = params->rect.x0 - sx * params->src_rect.x0, oy = params->rect.y0 - sy * params->src_rect.y0; PL_TRACE(p, "Aligning plane with ref: %f %f", ox, oy); pl_rect2df_offset(&rect, ox, oy); } if (!bind_pass_tex(sh, texname, &p->pass_textures.elem[j], &rect, hooked, mainpresub)) { goto error; } goto next_bind; } } // If none of the above matched, this is a bogus/unknown texture name PL_ERR(p, "Tried binding unknown texture '%.*s'!", PL_STR_FMT(texname)); goto error; next_bind: ; // outer 'continue' } // Set up the input variables p->frame_count++; GLSLH("#define frame %s \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_int("frame"), .data = &p->frame_count, .dynamic = true, })); float random = prng_step(p->prng_state); GLSLH("#define random %s \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("random"), .data = &random, .dynamic = true, })); float src_size[2] = { pl_rect_w(params->src_rect), pl_rect_h(params->src_rect) }; GLSLH("#define input_size %s \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("input_size"), .data = src_size, })); float dst_size[2] = { pl_rect_w(params->dst_rect), pl_rect_h(params->dst_rect) }; GLSLH("#define target_size %s \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("target_size"), .data = dst_size, })); float tex_off[2] = { params->src_rect.x0, params->src_rect.y0 }; GLSLH("#define tex_offset %s \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_offset"), .data = tex_off, })); // Load and run the user shader itself sh_append_str(sh, SH_BUF_HEADER, hook->pass_body); sh_describe(sh, pl_strdup0(SH_TMP(sh), hook->pass_desc)); bool ok; if (hook->is_compute) { GLSLP("#define out_image %s \n", sh_desc(sh, (struct pl_shader_desc) { .binding.object = fbo, .desc = { .name = "out_image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, })); sh->res.output = PL_SHADER_SIG_NONE; GLSL("hook(); \n"); ok = pl_dispatch_compute(params->dispatch, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { // Round up as many blocks as are needed to cover the image (out_w + hook->block_w - 1) / hook->block_w, (out_h + hook->block_h - 1) / hook->block_h, 1, }, .width = out_w, .height = out_h, )); } else { GLSL("vec4 color = hook(); \n"); ok = pl_dispatch_finish(params->dispatch, pl_dispatch_params( .shader = &sh, .target = fbo, )); } if (!ok) goto error; float sx = (float) out_w / params->tex->params.w, sy = (float) out_h / params->tex->params.h, x0 = sx * params->rect.x0 + hook->offset[0], y0 = sy * params->rect.y0 + hook->offset[1]; struct pl_rect2df new_rect = { x0, y0, x0 + sx * pl_rect_w(params->rect), y0 + sy * pl_rect_h(params->rect), }; if (hook->offset_align) { float rx = pl_rect_w(new_rect) / pl_rect_w(params->src_rect), ry = pl_rect_h(new_rect) / pl_rect_h(params->src_rect), ox = rx * params->src_rect.x0 - sx * params->rect.x0, oy = ry * params->src_rect.y0 - sy * params->rect.y0; pl_rect2df_offset(&new_rect, ox, oy); } // Save the result of this shader invocation struct pass_tex ptex = { .name = hook->save_tex.len ? hook->save_tex : stage, .tex = fbo, .repr = params->repr, .color = params->color, .comps = PL_DEF(hook->comps, params->components), .rect = new_rect, }; // It's assumed that users will correctly normalize the input pl_color_repr_normalize(&ptex.repr); PL_TRACE(p, "Saving output texture '%.*s' from hook execution on '%.*s'", PL_STR_FMT(ptex.name), PL_STR_FMT(stage)); save_pass_tex(p, ptex); // Update the result object, unless we saved to a different name if (pl_str_equals(ptex.name, stage)) { scope.hooked = ptex; res = (struct pl_hook_res) { .output = PL_HOOK_SIG_TEX, .tex = fbo, .repr = ptex.repr, .color = ptex.color, .components = ptex.comps, .rect = new_rect, }; } } return res; error: return (struct pl_hook_res) { .failed = true }; } const struct pl_hook *pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len) { if (!shader_len) return NULL; struct pl_hook *hook = pl_alloc_obj(NULL, hook, struct hook_priv); struct hook_priv *p = PL_PRIV(hook); *hook = (struct pl_hook) { .input = PL_HOOK_SIG_TEX, .priv = p, .reset = hook_reset, .hook = hook_hook, }; *p = (struct hook_priv) { .log = gpu->log, .gpu = gpu, .alloc = hook, .prng_state = { // Determined by fair die roll 0xb76d71f9443c228allu, 0x93a02092fc4807e8llu, 0x06d81748f838bd07llu, 0x9381ee129dddce6cllu, }, }; pl_str shader = { (uint8_t *) shader_text, shader_len }; shader = pl_strdup(hook, shader); // Skip all garbage (e.g. comments) before the first header int pos = pl_str_find(shader, pl_str0("//!")); if (pos < 0) { PL_ERR(gpu, "Shader appears to contain no headers?"); goto error; } shader = pl_str_drop(shader, pos); // Loop over the file while (shader.len > 0) { // Peek at the first header to dispatch the right type if (pl_str_startswith0(shader, "//!TEXTURE")) { struct pl_shader_desc sd; if (!parse_tex(gpu, hook, &shader, &sd)) goto error; PL_INFO(gpu, "Registering named texture '%s'", sd.desc.name); PL_ARRAY_APPEND(hook, p->descriptors, sd); continue; } if (pl_str_startswith0(shader, "//!BUFFER")) { struct pl_shader_desc sd; if (!parse_buf(gpu, hook, &shader, &sd)) goto error; PL_INFO(gpu, "Registering named buffer '%s'", sd.desc.name); PL_ARRAY_APPEND(hook, p->descriptors, sd); continue; } struct custom_shader_hook h; if (!parse_hook(gpu->log, &shader, &h)) goto error; struct hook_pass pass = { .exec_stages = 0, .hook = h, }; for (int i = 0; i < PL_ARRAY_SIZE(h.hook_tex); i++) pass.exec_stages |= mp_stage_to_pl(h.hook_tex[i]); for (int i = 0; i < PL_ARRAY_SIZE(h.bind_tex); i++) { p->save_stages |= mp_stage_to_pl(h.bind_tex[i]); if (pl_str_equals0(h.bind_tex[i], "HOOKED")) p->save_stages |= pass.exec_stages; } // As an extra precaution, this avoids errors when trying to run // conditions against planes that were never hooked. As a sole // exception, OUTPUT is special because it's hard-coded to return the // dst_rect even before it was hooked. (This is an apparently // undocumented mpv quirk, but shaders rely on it in practice) enum pl_hook_stage rpn_stages = 0; for (int i = 0; i < PL_ARRAY_SIZE(h.width); i++) { if (h.width[i].tag == SZEXP_VAR_W || h.width[i].tag == SZEXP_VAR_H) rpn_stages |= mp_stage_to_pl(h.width[i].val.varname); } for (int i = 0; i < PL_ARRAY_SIZE(h.height); i++) { if (h.height[i].tag == SZEXP_VAR_W || h.height[i].tag == SZEXP_VAR_H) rpn_stages |= mp_stage_to_pl(h.height[i].val.varname); } for (int i = 0; i < PL_ARRAY_SIZE(h.cond); i++) { if (h.cond[i].tag == SZEXP_VAR_W || h.cond[i].tag == SZEXP_VAR_H) rpn_stages |= mp_stage_to_pl(h.cond[i].val.varname); } p->save_stages |= rpn_stages & ~PL_HOOK_OUTPUT; PL_INFO(gpu, "Registering hook pass: %.*s", PL_STR_FMT(h.pass_desc)); PL_ARRAY_APPEND(hook, p->hook_passes, pass); } // We need to hook on both the exec and save stages, so that we can keep // track of any textures we might need hook->stages |= p->save_stages; for (int i = 0; i < p->hook_passes.num; i++) hook->stages |= p->hook_passes.elem[i].exec_stages; return hook; error: pl_free(hook); return NULL; } void pl_mpv_user_shader_destroy(const struct pl_hook **hookp) { const struct pl_hook *hook = *hookp; if (!hook) return; struct hook_priv *p = PL_PRIV(hook); for (int i = 0; i < p->descriptors.num; i++) { switch (p->descriptors.elem[i].desc.type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = p->descriptors.elem[i].binding.object; pl_buf_destroy(p->gpu, &buf); break; } case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: { pl_tex tex = p->descriptors.elem[i].binding.object; pl_tex_destroy(p->gpu, &tex); break; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } } pl_free((void *) hook); *hookp = NULL; } libplacebo-v4.192.1/src/shaders/film_grain.c000066400000000000000000000042001417677245700206650ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "film_grain.h" #include "shaders.h" bool pl_needs_film_grain(const struct pl_film_grain_params *params) { switch (params->data.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_AV1: return pl_needs_fg_av1(params); case PL_FILM_GRAIN_H274: return pl_needs_fg_h274(params); default: pl_unreachable(); } } struct sh_grain_obj { pl_shader_obj av1; pl_shader_obj h274; }; static void sh_grain_uninit(pl_gpu gpu, void *ptr) { struct sh_grain_obj *obj = ptr; pl_shader_obj_destroy(&obj->av1); pl_shader_obj_destroy(&obj->h274); } bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { if (!pl_needs_film_grain(params)) { // FIXME: Instead of erroring, sample directly SH_FAIL(sh, "pl_shader_film_grain called but no film grain needs to be " "applied, test with `pl_needs_film_grain` first!"); return false; } struct sh_grain_obj *obj; obj = SH_OBJ(sh, grain_state, PL_SHADER_OBJ_FILM_GRAIN, struct sh_grain_obj, sh_grain_uninit); if (!obj) return false; switch (params->data.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_AV1: return pl_shader_fg_av1(sh, &obj->av1, params); case PL_FILM_GRAIN_H274: return pl_shader_fg_h274(sh, &obj->h274, params); default: pl_unreachable(); } } libplacebo-v4.192.1/src/shaders/film_grain.h000066400000000000000000000043461417677245700207050ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" bool pl_needs_fg_av1(const struct pl_film_grain_params *); bool pl_needs_fg_h274(const struct pl_film_grain_params *); bool pl_shader_fg_av1(pl_shader, pl_shader_obj *, const struct pl_film_grain_params *); bool pl_shader_fg_h274(pl_shader, pl_shader_obj *, const struct pl_film_grain_params *); // Common helper function static inline enum pl_channel channel_map(int i, const struct pl_film_grain_params *params) { static const enum pl_channel map_rgb[3] = { [PL_CHANNEL_G] = PL_CHANNEL_Y, [PL_CHANNEL_B] = PL_CHANNEL_CB, [PL_CHANNEL_R] = PL_CHANNEL_CR, }; static const enum pl_channel map_xyz[3] = { [1] = PL_CHANNEL_Y, // Y [2] = PL_CHANNEL_CB, // Z [0] = PL_CHANNEL_CR, // X }; if (i >= params->components) return PL_CHANNEL_NONE; int comp = params->component_mapping[i]; if (comp < 0 || comp > 2) return PL_CHANNEL_NONE; switch (params->repr->sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: return map_rgb[comp]; case PL_COLOR_SYSTEM_XYZ: return map_xyz[comp]; case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: return comp; case PL_COLOR_SYSTEM_COUNT: break; } pl_unreachable(); } libplacebo-v4.192.1/src/shaders/film_grain_av1.c000066400000000000000000001253551417677245700214530ustar00rootroot00000000000000/* * This file is part of libplacebo, which is normally licensed under the terms * of the LGPL v2.1+. However, this file (film_grain_av1.c) is also available * under the terms of the more permissive MIT license: * * Copyright (c) 2018-2019 Niklas Haas * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "film_grain.h" #include "shaders.h" // Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512 static const int16_t gaussian_sequence[2048] = { 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820, 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800, 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588, -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368, 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4, 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396, 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740, 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292, 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532, 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704, 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96, -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244, 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136, 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676, -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400, -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844, -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96, -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356, 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280, 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808, 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228, -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136, -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264, -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388, 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500, 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384, 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220, -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148, 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572, -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516, 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916, -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492, 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560, -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108, -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516, -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88, -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196, -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864, 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920, 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564, -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876, -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244, 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184, 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364, -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72, 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24, 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4, -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120, 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108, -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296, 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336, -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164, -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264, 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536, -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296, -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696, 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204, 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212, -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40, 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384, 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8, 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704, -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348, -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592, -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420, 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220, -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208, -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544, -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288, -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240, -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132, 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16, -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044, -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732, 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460, -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52, -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104, -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460, 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716, -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960, 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476, 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692, 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352, -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144, -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44, 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356, 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452, -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552, -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264, -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448, -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588, 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464, 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216, 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132, 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412, 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48, 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196, 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48, -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292, 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32, -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012, -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120, -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56, 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416, -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404, -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92, 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904, 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728, 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584, 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48, 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180, 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528, 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364, -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260, -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324, -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64, 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120, -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168, -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888, 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588, -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484, 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580, 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392, 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80, -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688, 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4, -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300, 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444, 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192, 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160, 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188, -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404, -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400, 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92, -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824, 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620, 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720, 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620, -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508, -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736, 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836, 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180, 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140, -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32, -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916, 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368, -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380, -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572, -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864, 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908, -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84, 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396, -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360, 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928, -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288, 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196, 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504, 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272, 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344, -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208, -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156, -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240, -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432, 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244, 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584, 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24, 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300, -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416, 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380, -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384, 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88, 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876, -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320, -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88, -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196, -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120, 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664, -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0, -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264, -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288, -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56, 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148, 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156, -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144, -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148, 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944, 428, -484 }; static inline int get_random_number(int bits, uint16_t *state) { int r = *state; uint16_t bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1; *state = (r >> 1) | (bit << 15); return (*state >> (16 - bits)) & ((1 << bits) - 1); } static inline int round2(int x, int shift) { if (!shift) return x; return (x + (1 << (shift - 1))) >> shift; } enum { BLOCK_SIZE = 32, SCALING_LUT_SIZE = 256, GRAIN_WIDTH = 82, GRAIN_HEIGHT = 73, // On the GPU we only need a subsection of this GRAIN_WIDTH_LUT = 64, GRAIN_HEIGHT_LUT = 64, GRAIN_PAD_LUT = 9, // For subsampled grain textures SUB_GRAIN_WIDTH = 44, SUB_GRAIN_HEIGHT = 38, SUB_GRAIN_WIDTH_LUT = GRAIN_WIDTH_LUT >> 1, SUB_GRAIN_HEIGHT_LUT = GRAIN_HEIGHT_LUT >> 1, SUB_GRAIN_PAD_LUT = 6, }; // Contains the shift by which the offsets are indexed enum offset { OFFSET_TL = 24, OFFSET_T = 16, OFFSET_L = 8, OFFSET_N = 0, }; // Helper function to compute some common constants struct grain_scale { int grain_center; int grain_min; int grain_max; float texture_scale; float grain_scale; }; static inline int bit_depth(const struct pl_color_repr *repr) { int depth = PL_DEF(repr->bits.color_depth, PL_DEF(repr->bits.sample_depth, 8)); pl_assert(depth >= 8); return depth; } static struct grain_scale get_grain_scale(const struct pl_film_grain_params *params) { int bits = bit_depth(params->repr); struct grain_scale ret = { .grain_center = 128 << (bits - 8), }; ret.grain_min = -ret.grain_center; ret.grain_max = (256 << (bits - 8)) - 1 - ret.grain_center; struct pl_color_repr repr = *params->repr; ret.texture_scale = pl_color_repr_normalize(&repr); // Since our color samples are normalized to the range [0, 1], we need to // scale down grain values from the scale [0, 2^b - 1] to this range. ret.grain_scale = 1.0 / ((1 << bits) - 1); return ret; } // Generates the basic grain table (LumaGrain in the spec). static void generate_grain_y(float out[GRAIN_HEIGHT_LUT][GRAIN_WIDTH_LUT], int16_t buf[GRAIN_HEIGHT][GRAIN_WIDTH], const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; struct grain_scale scale = get_grain_scale(params); uint16_t seed = (uint16_t) params->data.seed; int bits = bit_depth(params->repr); int shift = 12 - bits + data->grain_scale_shift; pl_assert(shift >= 0); for (int y = 0; y < GRAIN_HEIGHT; y++) { for (int x = 0; x < GRAIN_WIDTH; x++) { int16_t value = gaussian_sequence[ get_random_number(11, &seed) ]; buf[y][x] = round2(value, shift); } } const int ar_pad = 3; int ar_lag = data->ar_coeff_lag; for (int y = ar_pad; y < GRAIN_HEIGHT; y++) { for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) { const int8_t *coeff = data->ar_coeffs_y; int sum = 0; for (int dy = -ar_lag; dy <= 0; dy++) { for (int dx = -ar_lag; dx <= ar_lag; dx++) { if (!dx && !dy) break; sum += *(coeff++) * buf[y + dy][x + dx]; } } int16_t grain = buf[y][x] + round2(sum, data->ar_coeff_shift); grain = PL_CLAMP(grain, scale.grain_min, scale.grain_max); buf[y][x] = grain; } } for (int y = 0; y < GRAIN_HEIGHT_LUT; y++) { for (int x = 0; x < GRAIN_WIDTH_LUT; x++) { int16_t grain = buf[y + GRAIN_PAD_LUT][x + GRAIN_PAD_LUT]; out[y][x] = grain * scale.grain_scale; } } } static void generate_grain_uv(float *out, int16_t buf[GRAIN_HEIGHT][GRAIN_WIDTH], const int16_t buf_y[GRAIN_HEIGHT][GRAIN_WIDTH], enum pl_channel channel, int sub_x, int sub_y, const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; struct grain_scale scale = get_grain_scale(params); int bits = bit_depth(params->repr); int shift = 12 - bits + data->grain_scale_shift; pl_assert(shift >= 0); uint16_t seed = params->data.seed; if (channel == PL_CHANNEL_CB) { seed ^= 0xb524; } else if (channel == PL_CHANNEL_CR) { seed ^= 0x49d8; } int chromaW = sub_x ? SUB_GRAIN_WIDTH : GRAIN_WIDTH; int chromaH = sub_y ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT; const int8_t *coeffs[] = { [PL_CHANNEL_CB] = data->ar_coeffs_uv[0], [PL_CHANNEL_CR] = data->ar_coeffs_uv[1], }; for (int y = 0; y < chromaH; y++) { for (int x = 0; x < chromaW; x++) { int16_t value = gaussian_sequence[ get_random_number(11, &seed) ]; buf[y][x] = round2(value, shift); } } const int ar_pad = 3; int ar_lag = data->ar_coeff_lag; for (int y = ar_pad; y < chromaH; y++) { for (int x = ar_pad; x < chromaW - ar_pad; x++) { const int8_t *coeff = coeffs[channel]; pl_assert(coeff); int sum = 0; for (int dy = -ar_lag; dy <= 0; dy++) { for (int dx = -ar_lag; dx <= ar_lag; dx++) { // For the final (current) pixel, we need to add in the // contribution from the luma grain texture if (!dx && !dy) { if (!data->num_points_y) break; int luma = 0; int lumaX = ((x - ar_pad) << sub_x) + ar_pad; int lumaY = ((y - ar_pad) << sub_y) + ar_pad; for (int i = 0; i <= sub_y; i++) { for (int j = 0; j <= sub_x; j++) { luma += buf_y[lumaY + i][lumaX + j]; } } luma = round2(luma, sub_x + sub_y); sum += luma * (*coeff); break; } sum += *(coeff++) * buf[y + dy][x + dx]; } } int16_t grain = buf[y][x] + round2(sum, data->ar_coeff_shift); grain = PL_CLAMP(grain, scale.grain_min, scale.grain_max); buf[y][x] = grain; } } int lutW = GRAIN_WIDTH_LUT >> sub_x; int lutH = GRAIN_HEIGHT_LUT >> sub_y; int padX = sub_x ? SUB_GRAIN_PAD_LUT : GRAIN_PAD_LUT; int padY = sub_y ? SUB_GRAIN_PAD_LUT : GRAIN_PAD_LUT; for (int y = 0; y < lutH; y++) { for (int x = 0; x < lutW; x++) { int16_t grain = buf[y + padY][x + padX]; out[y * lutW + x] = grain * scale.grain_scale; } } } static void generate_offsets(void *pbuf, const struct sh_lut_params *params) { const struct pl_film_grain_data *data = params->priv; unsigned int *buf = pbuf; pl_static_assert(sizeof(unsigned int) >= sizeof(uint32_t)); for (int y = 0; y < params->height; y++) { uint16_t state = data->seed; state ^= ((y * 37 + 178) & 0xFF) << 8; state ^= ((y * 173 + 105) & 0xFF); for (int x = 0; x < params->width; x++) { unsigned int *offsets = &buf[y * params->width + x]; uint8_t val = get_random_number(8, &state); uint8_t val_l = x ? (offsets - 1)[0] : 0; uint8_t val_t = y ? (offsets - params->width)[0] : 0; uint8_t val_tl = x && y ? (offsets - params->width - 1)[0] : 0; // Encode four offsets into a single 32-bit integer for the // convenience of the GPU. That way only one LUT fetch is // required for the entire block. *offsets = ((uint32_t) val_tl << OFFSET_TL) | ((uint32_t) val_t << OFFSET_T) | ((uint32_t) val_l << OFFSET_L) | ((uint32_t) val << OFFSET_N); } } } static void generate_scaling(void *pdata, const struct sh_lut_params *params) { assert(params->width == SCALING_LUT_SIZE && params->comps == 1); float *data = pdata; struct { int num; uint8_t (*points)[2]; const struct pl_av1_grain_data *data; } *ctx = params->priv; float range = 1 << ctx->data->scaling_shift; // Fill up the preceding entries with the initial value for (int i = 0; i < ctx->points[0][0]; i++) data[i] = ctx->points[0][1] / range; // Linearly interpolate the values in the middle for (int i = 0; i < ctx->num - 1; i++) { int bx = ctx->points[i][0]; int by = ctx->points[i][1]; int dx = ctx->points[i + 1][0] - bx; int dy = ctx->points[i + 1][1] - by; int delta = dy * ((0x10000 + (dx >> 1)) / dx); for (int x = 0; x < dx; x++) { int v = by + ((x * delta + 0x8000) >> 16); data[bx + x] = v / range; } } // Fill up the remaining entries with the final value for (int i = ctx->points[ctx->num - 1][0]; i < SCALING_LUT_SIZE; i++) data[i] = ctx->points[ctx->num - 1][1] / range; } static void sample(pl_shader sh, enum offset off, ident_t lut, int idx, int sub_x, int sub_y) { int dx = (off & OFFSET_L) ? 1 : 0, dy = (off & OFFSET_T) ? 1 : 0; static const char *index_strs[] = { [0] = ".x", [1] = ".y", }; GLSL("offset = uvec2(%du, %du) * uvec2((data >> %d) & 0xFu, \n" " (data >> %d) & 0xFu);\n" "pos = offset + local_id.xy + uvec2(%d, %d); \n" "val = %s(pos)%s; \n", sub_x ? 1 : 2, sub_y ? 1 : 2, off + 4, off, (BLOCK_SIZE >> sub_x) * dx, (BLOCK_SIZE >> sub_y) * dy, lut, idx >= 0 ? index_strs[idx] : ""); } struct grain_obj_av1 { // LUT objects for the offsets, grain and scaling luts pl_shader_obj lut_offsets; pl_shader_obj lut_grain[2]; pl_shader_obj lut_scaling[3]; // Previous parameters used to check reusability struct pl_film_grain_data data; struct pl_color_repr repr; bool fg_has_y; bool fg_has_u; bool fg_has_v; // Space to store the temporary arrays, reused uint32_t *offsets; float grain[2][GRAIN_HEIGHT_LUT][GRAIN_WIDTH_LUT]; int16_t grain_tmp_y[GRAIN_HEIGHT][GRAIN_WIDTH]; int16_t grain_tmp_uv[GRAIN_HEIGHT][GRAIN_WIDTH]; }; static void av1_grain_uninit(pl_gpu gpu, void *ptr) { struct grain_obj_av1 *obj = ptr; pl_shader_obj_destroy(&obj->lut_offsets); for (int i = 0; i < PL_ARRAY_SIZE(obj->lut_grain); i++) pl_shader_obj_destroy(&obj->lut_grain[i]); for (int i = 0; i < PL_ARRAY_SIZE(obj->lut_scaling); i++) pl_shader_obj_destroy(&obj->lut_scaling[i]); *obj = (struct grain_obj_av1) {0}; } bool pl_needs_fg_av1(const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; bool has_y = data->num_points_y > 0; bool has_u = data->num_points_uv[0] > 0 || data->chroma_scaling_from_luma; bool has_v = data->num_points_uv[1] > 0 || data->chroma_scaling_from_luma; for (int i = 0; i < 3; i++) { enum pl_channel channel = channel_map(i, params); if (channel == PL_CHANNEL_Y && has_y) return true; if (channel == PL_CHANNEL_CB && has_u) return true; if (channel == PL_CHANNEL_CR && has_v) return true; } return false; } static inline bool av1_grain_data_eq(const struct pl_film_grain_data *da, const struct pl_film_grain_data *db) { const struct pl_av1_grain_data *a = &da->params.av1, *b = &db->params.av1; // Only check the fields that are relevant for grain LUT generation return da->seed == db->seed && a->chroma_scaling_from_luma == b->chroma_scaling_from_luma && a->scaling_shift == b->scaling_shift && a->ar_coeff_lag == b->ar_coeff_lag && a->ar_coeff_shift == b->ar_coeff_shift && a->grain_scale_shift == b->grain_scale_shift && !memcmp(a->ar_coeffs_y, b->ar_coeffs_y, sizeof(a->ar_coeffs_y)) && !memcmp(a->ar_coeffs_uv, b->ar_coeffs_uv, sizeof(a->ar_coeffs_uv)); } static void fill_grain_lut(void *data, const struct sh_lut_params *params) { struct grain_obj_av1 *obj = params->priv; size_t entries = params->width * params->height * params->comps; memcpy(data, obj->grain, entries * sizeof(float)); } bool pl_shader_fg_av1(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { int sub_x = 0, sub_y = 0; int tex_w = params->tex->params.w, tex_h = params->tex->params.h; if (params->luma_tex) { sub_x = params->luma_tex->params.w > tex_w; sub_y = params->luma_tex->params.h > tex_h; } const struct pl_av1_grain_data *data = ¶ms->data.params.av1; bool fg_has_y = data->num_points_y > 0; bool fg_has_u = data->num_points_uv[0] > 0 || data->chroma_scaling_from_luma; bool fg_has_v = data->num_points_uv[1] > 0 || data->chroma_scaling_from_luma; bool tex_is_y = false, tex_is_cb = false, tex_is_cr = false; for (int i = 0; i < 3; i++) { switch (channel_map(i, params)) { case PL_CHANNEL_Y: tex_is_y = true; break; case PL_CHANNEL_CB: tex_is_cb = true; break; case PL_CHANNEL_CR: tex_is_cr = true; break; default: break; }; } if (tex_is_y && (sub_x || sub_y)) { PL_WARN(sh, "pl_film_grain_params.channels includes PL_CHANNEL_Y but " "plane is subsampled, this makes no sense. Continuing anyway " "but output is likely incorrect."); } if (!sh_require(sh, PL_SHADER_SIG_NONE, tex_w, tex_h)) return false; pl_gpu gpu = SH_GPU(sh); if (!gpu) { PL_ERR(sh, "AV1 film grain synthesis requires a non-NULL pl_gpu!"); return false; } if (sh_glsl(sh).version < 130) { PL_ERR(sh, "AV1 film grain synthesis requires GLSL >= 130!"); return false; } // Disable generation for unneeded component types fg_has_y &= tex_is_y; fg_has_u &= tex_is_cb; fg_has_v &= tex_is_cr; int bw = BLOCK_SIZE >> sub_x; int bh = BLOCK_SIZE >> sub_y; bool is_compute = sh_try_compute(sh, bw, bh, false, sizeof(uint32_t)); struct grain_obj_av1 *obj; obj = SH_OBJ(sh, grain_state, PL_SHADER_OBJ_AV1_GRAIN, struct grain_obj_av1, av1_grain_uninit); if (!obj) return false; // Note: In theory we could check only the parameters related to luma or // only related to chroma and skip updating for changes to irrelevant // parts, but this is probably not worth it since the seed is expected to // change per frame anyway. bool needs_update = !av1_grain_data_eq(¶ms->data, &obj->data) || !pl_color_repr_equal(params->repr, &obj->repr) || fg_has_y != obj->fg_has_y || fg_has_u != obj->fg_has_u || fg_has_v != obj->fg_has_v; if (needs_update) { // This is needed even for chroma, so statically generate it generate_grain_y(obj->grain[0], obj->grain_tmp_y, params); } ident_t lut[3]; int idx[3] = {-1}; if (fg_has_y) { lut[0] = sh_lut(sh, sh_lut_params( .object = &obj->lut_grain[0], .method = SH_LUT_TEXTURE, .type = PL_VAR_FLOAT, .width = GRAIN_WIDTH_LUT, .height = GRAIN_HEIGHT_LUT, .comps = 1, .update = needs_update, .dynamic = true, .fill = fill_grain_lut, .priv = obj, )); if (!lut[0]) { SH_FAIL(sh, "Failed generating/uploading luma grain LUT!"); return false; } } // Try merging the chroma LUTs into a single texture int chroma_comps = 0; if (fg_has_u) { generate_grain_uv(&obj->grain[chroma_comps][0][0], obj->grain_tmp_uv, obj->grain_tmp_y, PL_CHANNEL_CB, sub_x, sub_y, params); idx[1] = chroma_comps++; } if (fg_has_v) { generate_grain_uv(&obj->grain[chroma_comps][0][0], obj->grain_tmp_uv, obj->grain_tmp_y, PL_CHANNEL_CR, sub_x, sub_y, params); idx[2] = chroma_comps++; } if (chroma_comps > 0) { lut[1] = lut[2] = sh_lut(sh, sh_lut_params( .object = &obj->lut_grain[1], .method = SH_LUT_TEXTURE, .type = PL_VAR_FLOAT, .width = GRAIN_WIDTH_LUT >> sub_x, .height = GRAIN_HEIGHT_LUT >> sub_y, .comps = chroma_comps, .update = needs_update, .dynamic = true, .fill = fill_grain_lut, .priv = obj, )); if (!lut[1]) { SH_FAIL(sh, "Failed generating/uploading chroma grain LUT!"); return false; } if (chroma_comps == 1) idx[1] = idx[2] = -1; } ident_t offsets = sh_lut(sh, sh_lut_params( .object = &obj->lut_offsets, .method = SH_LUT_AUTO, .type = PL_VAR_UINT, .width = PL_ALIGN2(tex_w << sub_x, 128) / 32, .height = PL_ALIGN2(tex_h << sub_y, 128) / 32, .comps = 1, .update = needs_update, .dynamic = true, .fill = generate_offsets, .priv = (void *) ¶ms->data, )); // For the scaling LUTs, we assume they'll be relatively constant // throughout the video so doing some extra work to avoid reinitializing // them constantly is probably worth it. Probably. const struct pl_av1_grain_data *obj_data = &obj->data.params.av1; bool scaling_changed = false; if (fg_has_y || data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_y != obj_data->num_points_y; scaling_changed |= memcmp(data->points_y, obj_data->points_y, sizeof(data->points_y)); } if (fg_has_u && !data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_uv[0] != obj_data->num_points_uv[0]; scaling_changed |= memcmp(data->points_uv[0], obj_data->points_uv[0], sizeof(data->points_uv[0])); } if (fg_has_v && !data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_uv[1] != obj_data->num_points_uv[1]; scaling_changed |= memcmp(data->points_uv[1], obj_data->points_uv[1], sizeof(data->points_uv[1])); } ident_t scaling[3] = {0}; for (int i = 0; i < 3; i++) { struct { int num; const uint8_t (*points)[2]; const struct pl_av1_grain_data *data; } priv; priv.data = data; if (i == 0 || data->chroma_scaling_from_luma) { priv.num = data->num_points_y; priv.points = &data->points_y[0]; } else { priv.num = data->num_points_uv[i - 1]; priv.points = &data->points_uv[i - 1][0]; } // Skip scaling for unneeded channels bool has_c[3] = { fg_has_y, fg_has_u, fg_has_v }; if (has_c[i] && priv.num > 0) { scaling[i] = sh_lut(sh, sh_lut_params( .object = &obj->lut_scaling[i], .type = PL_VAR_FLOAT, .width = SCALING_LUT_SIZE, .comps = 1, .linear = true, .update = scaling_changed, .dynamic = true, .fill = generate_scaling, .priv = &priv, )); if (!scaling[i]) { SH_FAIL(sh, "Failed generating/uploading scaling LUTs!"); return false; } } } // Done updating LUTs obj->data = params->data; obj->repr = *params->repr; obj->fg_has_y = fg_has_y; obj->fg_has_u = fg_has_u; obj->fg_has_v = fg_has_v; sh_describe(sh, "AV1 film grain"); GLSL("vec4 color; \n" "// pl_shader_film_grain (AV1) \n" "{ \n" "uvec2 offset; \n" "uvec2 pos; \n" "float val; \n" "float grain; \n"); if (is_compute) { GLSL("uvec2 block_id = gl_WorkGroupID.xy; \n" "uvec2 local_id = gl_LocalInvocationID.xy; \n" "uvec2 global_id = gl_GlobalInvocationID.xy; \n"); } else { GLSL("uvec2 global_id = uvec2(gl_FragCoord); \n" "uvec2 block_id = global_id / uvec2(%d, %d); \n" "uvec2 local_id = global_id - uvec2(%d, %d) * block_id; \n", bw, bh, bw, bh); } // Load the data vector which holds the offsets if (is_compute) { GLSLH("shared uint data; \n"); GLSL("if (gl_LocalInvocationIndex == 0u) \n" " data = uint(%s(block_id)); \n" "barrier(); \n", offsets); } else { GLSL("uint data = uint(%s(block_id)); \n", offsets); } struct grain_scale scale = get_grain_scale(params); pl_color_repr_normalize(params->repr); int bits = PL_DEF(params->repr->bits.color_depth, 8); pl_assert(bits >= 8); ident_t minValue, maxLuma, maxChroma; if (pl_color_levels_guess(params->repr) == PL_COLOR_LEVELS_LIMITED) { float out_scale = (1 << bits) / ((1 << bits) - 1.0); minValue = SH_FLOAT(16 / 256.0 * out_scale); maxLuma = SH_FLOAT(235 / 256.0 * out_scale); maxChroma = SH_FLOAT(240 / 256.0 * out_scale); if (!pl_color_system_is_ycbcr_like(params->repr->sys)) maxChroma = maxLuma; } else { minValue = SH_FLOAT(0.0); maxLuma = SH_FLOAT(1.0); maxChroma = SH_FLOAT(1.0); } // Load the color value of the tex itself ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = (struct pl_desc) { .name = "tex", .type = PL_DESC_SAMPLED_TEX, }, }); ident_t tex_scale = SH_FLOAT(scale.texture_scale); GLSL("color = vec4(%s) * texelFetch(%s, ivec2(global_id), 0); \n", tex_scale, tex); // If we need access to the external luma plane, load it now if (tex_is_cb || tex_is_cr) { GLSL("float averageLuma; \n"); if (tex_is_y) { // We already have the luma channel as part of the pre-sampled color for (int i = 0; i < 3; i++) { if (channel_map(i, params) == PL_CHANNEL_Y) { GLSL("averageLuma = color[%s]; \n", SH_INT(i)); break; } } } else { // Luma channel not present in image, attach it separately pl_assert(params->luma_tex); ident_t luma = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->luma_tex, .desc = (struct pl_desc) { .name = "luma", .type = PL_DESC_SAMPLED_TEX, }, }); GLSL("pos = global_id * uvec2(%du, %du); \n" "averageLuma = %s * texelFetch(%s, ivec2(pos), 0)[%s]; \n", 1 << sub_x, 1 << sub_y, tex_scale, luma, SH_INT(params->luma_comp)); } } ident_t grain_min = SH_FLOAT(scale.grain_min * scale.grain_scale); ident_t grain_max = SH_FLOAT(scale.grain_max * scale.grain_scale); for (int i = 0; i < params->components; i++) { enum pl_channel c = channel_map(i, params); if (c == PL_CHANNEL_NONE) continue; if (!scaling[c]) continue; sample(sh, OFFSET_N, lut[c], idx[c], sub_x, sub_y); GLSL("grain = val; \n"); if (data->overlap) { const char *weights[] = { "vec2(27.0, 17.0)", "vec2(23.0, 22.0)" }; // X-direction overlapping GLSL("if (block_id.x > 0u && local_id.x < %du) { \n" "vec2 w = %s / 32.0; \n" "if (local_id.x == 1u) w.xy = w.yx; \n", 2 >> sub_x, weights[sub_x]); sample(sh, OFFSET_L, lut[c], idx[c], sub_x, sub_y); GLSL("grain = dot(vec2(val, grain), w); \n" "} \n"); // Y-direction overlapping GLSL("if (block_id.y > 0u && local_id.y < %du) { \n" "vec2 w = %s / 32.0; \n" "if (local_id.y == 1u) w.xy = w.yx; \n", 2 >> sub_y, weights[sub_y]); // We need to special-case the top left pixels since these need to // pre-blend the top-left offset block before blending vertically GLSL(" if (block_id.x > 0u && local_id.x < %du) {\n" " vec2 w2 = %s / 32.0; \n" " if (local_id.x == 1u) w2.xy = w2.yx; \n", 2 >> sub_x, weights[sub_x]); sample(sh, OFFSET_TL, lut[c], idx[c], sub_x, sub_y); GLSL(" float tmp = val; \n"); sample(sh, OFFSET_T, lut[c], idx[c], sub_x, sub_y); GLSL(" val = dot(vec2(tmp, val), w2); \n" " } else { \n"); sample(sh, OFFSET_T, lut[c], idx[c], sub_x, sub_y); GLSL(" } \n" "grain = dot(vec2(val, grain), w); \n" "} \n"); // Correctly clip the interpolated grain GLSL("grain = clamp(grain, %s, %s); \n", grain_min, grain_max); } if (c == PL_CHANNEL_Y) { GLSL("color[%d] += %s(color[%d]) * grain; \n" "color[%d] = clamp(color[%d], %s, %s); \n", i, scaling[c], i, i, i, minValue, maxLuma); } else { GLSL("val = averageLuma; \n"); if (!data->chroma_scaling_from_luma) { // We need to load some extra variables for the mixing. Do this // using sh_var instead of hard-coding them to avoid shader // recompilation when these values change. ident_t mult = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("mult"), .data = &(float[2]){ data->uv_mult_luma[c - 1] / 64.0, data->uv_mult[c - 1] / 64.0, }, }); int c_offset = data->uv_offset[c - 1] << (bits - 8); ident_t offset = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("offset"), .data = &(float) { c_offset * scale.grain_scale }, }); GLSL("val = dot(vec2(val, color[%d]), %s); \n", i, mult); GLSL("val += %s; \n", offset); } GLSL("color[%d] += %s(val) * grain; \n" "color[%d] = clamp(color[%d], %s, %s); \n", i, scaling[c], i, i, minValue, maxChroma); } } GLSL("} \n"); return true; } libplacebo-v4.192.1/src/shaders/film_grain_h274.c000066400000000000000000001331071417677245700214420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "shaders.h" #include "film_grain.h" static const int8_t Gaussian_LUT[2048+4]; static const uint32_t Seed_LUT[256]; static const int8_t R64T[64][64]; static void prng_shift(uint32_t *state) { // Primitive polynomial x^31 + x^3 + 1 (modulo 2) uint32_t x = *state; uint8_t feedback = (x >> 2) ^ (x >> 30); *state = (x << 1) | (feedback & 1u); } static void generate_slice(float *out, size_t out_width, uint8_t h, uint8_t v, int8_t grain[64][64], int16_t tmp[64][64]) { const uint8_t freq_h = ((h + 3) << 2) - 1; const uint8_t freq_v = ((v + 3) << 2) - 1; uint32_t seed = Seed_LUT[h + v * 13]; // Initialize with random gaussian values, using the output array as a // temporary buffer for these intermediate values. // // Note: To make the subsequent matrix multiplication cache friendlier, we // store each *column* of the starting image in a *row* of `grain` for (int y = 0; y <= freq_v; y++) { for (int x = 0; x <= freq_h; x += 4) { uint16_t offset = seed % 2048; grain[x + 0][y] = Gaussian_LUT[offset + 0]; grain[x + 1][y] = Gaussian_LUT[offset + 1]; grain[x + 2][y] = Gaussian_LUT[offset + 2]; grain[x + 3][y] = Gaussian_LUT[offset + 3]; prng_shift(&seed); } } grain[0][0] = 0; // 64x64 inverse integer transform for (int y = 0; y < 64; y++) { for (int x = 0; x <= freq_h; x++) { int32_t sum = 0; for (int p = 0; p <= freq_v; p++) sum += R64T[y][p] * grain[x][p]; tmp[y][x] = (sum + 128) >> 8; } } for (int y = 0; y < 64; y++) { for (int x = 0; x < 64; x++) { int32_t sum = 0; for (int p = 0; p <= freq_h; p++) sum += tmp[y][p] * R64T[x][p]; // R64T^T = R64 sum = (sum + 128) >> 8; grain[y][x] = PL_CLAMP(sum, -127, 127); } } static const uint8_t deblock_factors[13] = { 64, 71, 77, 84, 90, 96, 103, 109, 116, 122, 128, 128, 128 }; // Deblock horizontal edges by simple attentuation of values const uint8_t deblock_coeff = deblock_factors[v]; for (int y = 0; y < 64; y++) { switch (y % 8) { case 0: case 7: // Deblock for (int x = 0; x < 64; x++) out[x] = ((grain[y][x] * deblock_coeff) >> 7) / 255.0; break; case 1: case 2: case 3: case 4: case 5: case 6: // No deblock for (int x = 0; x < 64; x++) out[x] = grain[y][x] / 255.0; break; default: pl_unreachable(); } out += out_width; } } static void fill_grain_lut(void *data, const struct sh_lut_params *params) { struct { int8_t grain[64][64]; int16_t tmp[64][64]; } *tmp = pl_alloc_ptr(NULL, tmp); float *out = data; assert(params->type == PL_VAR_FLOAT); for (int h = 0; h < 13; h++) { for (int v = 0; v < 13; v++) { float *slice = out + (h * 64) * params->width + (v * 64); generate_slice(slice, params->width, h, v, tmp->grain, tmp->tmp); } } pl_free(tmp); } bool pl_needs_fg_h274(const struct pl_film_grain_params *params) { const struct pl_h274_grain_data *data = ¶ms->data.params.h274; if (data->model_id != 0) return false; for (int i = 0; i < 3; i++) { enum pl_channel channel = channel_map(i, params); if (channel < 0 || channel >= 3) continue; if (data->component_model_present[channel]) return true; } return false; } bool pl_shader_fg_h274(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { if (!sh_require(sh, PL_SHADER_SIG_NONE, params->tex->params.w, params->tex->params.h)) return false; size_t shmem_req = 0; ident_t group_sum = NULL; const struct pl_glsl_version glsl = sh_glsl(sh); if (glsl.subgroup_size < 8*8) { group_sum = sh_fresh(sh, "group_sum"); shmem_req += sizeof(int); GLSLH("shared int %s; \n", group_sum); GLSL("%s = 0; barrier(); \n", group_sum); } if (!sh_try_compute(sh, 8, 8, false, shmem_req) || glsl.version < 130) { SH_FAIL(sh, "H.274 film grain synthesis requires compute shaders!"); return false; } ident_t db = sh_lut(sh, sh_lut_params( .object = grain_state, .method = SH_LUT_TEXTURE, .type = PL_VAR_FLOAT, .width = 13 * 64, .height = 13 * 64, .comps = 1, .fill = fill_grain_lut, )); sh_describe(sh, "H.274 film grain"); GLSL("vec4 color; \n" "// pl_shader_film_grain (H.274) \n" "{ \n"); // Load the color value of the tex itself ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = (struct pl_desc) { .name = "tex", .type = PL_DESC_SAMPLED_TEX, }, }); GLSL("color = vec4(%s) * texelFetch(%s, ivec2(gl_GlobalInvocationID), 0); \n", SH_FLOAT(pl_color_repr_normalize(params->repr)), tex); const struct pl_h274_grain_data *data = ¶ms->data.params.h274; ident_t scale_factor = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("scale_factor"), .data = &(float){ 1.0 / (1 << (data->log2_scale_factor + 6)) }, }); // pcg3d (http://www.jcgt.org/published/0009/03/02/) GLSL("uvec3 pcg = uvec3(%s, gl_WorkGroupID.xy / 2u); \n" "pcg = pcg * 1664525u + 1013904223u; \n" "pcg.x += pcg.y * pcg.z; \n" "pcg.y += pcg.z * pcg.x; \n" "pcg.z += pcg.x * pcg.y; \n" "pcg ^= pcg >> 16u; \n" "pcg.x += pcg.y * pcg.z; \n" "pcg.y += pcg.z * pcg.x; \n" "pcg.z += pcg.x * pcg.y; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_uint("seed"), .data = &(unsigned int){ params->data.seed }, })); for (int idx = 0; idx < params->components; idx++) { enum pl_channel c = channel_map(idx, params); if (c == PL_CHANNEL_NONE) continue; if (!data->component_model_present[c]) continue; GLSL("// component %d\n{\n", c); // Compute the local 8x8 average GLSL("float avg = color[%d] / 64.0; \n", c); const int precision = 10000000; if (glsl.subgroup_size) { GLSL("avg = subgroupAdd(avg); \n"); if (glsl.subgroup_size < 8*8) { GLSL("if (subgroupElect()) \n" " atomicAdd(%s, int(avg * %d.0)); \n" "barrier(); \n" "avg = float(%s) / %d.0; \n", group_sum, precision, group_sum, precision); } } else { GLSL("atomicAdd(%s, int(avg * %d.0)); \n" "barrier(); \n" "avg = float(%s) / %d.0; \n", group_sum, precision, group_sum, precision); } // Hard-coded unrolled loop, to avoid having to load a dynamically // sized array into the shader - and to optimize for the very common // case of there only being a single intensity interval GLSL("uvec3 values; \n"); for (int i = 0; i < data->num_intensity_intervals[c]; i++) { ident_t bounds = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("bounds"), .data = &(float[2]) { data->intensity_interval_lower_bound[c][i] / 255.0, data->intensity_interval_upper_bound[c][i] / 255.0, }, }); ident_t values = sh_var(sh, (struct pl_shader_var) { .var = pl_var_uvec3("comp_model_value"), .data = &(unsigned int[3]) { data->comp_model_value[c][i][0], data->comp_model_value[c][i][1] - 2, data->comp_model_value[c][i][2] - 2, }, }); GLSL("if (avg >= %s.x && avg <= %s.y) \n" " values = %s; else \n", bounds, bounds, values); } GLSL(" values = uvec3(0u); \n"); // Extract the grain parameters from comp_model_value GLSL("uvec2 offset = 64u * values.yz; \n" "float scale = %s * float(values.x); \n" // Add randomness "uint rand = pcg[%d]; \n" "offset.y += (rand >> 16u) %% 52u; \n" "offset.x += (rand & 0xFFFFu) %% 56u; \n" "if ((rand & 1u) == 1u) scale = -scale; \n" // Add local offset and compute grain "offset += 8u * (gl_WorkGroupID.xy %% 2u); \n" "offset += gl_LocalInvocationID.xy; \n" "float grain = %s(offset); \n" "color[%d] += scale * grain; \n", scale_factor, c, db, c); // TODO: Deblocking? GLSL("}\n"); } GLSL("} \n"); return true; } // These tables are all taken from the SMPTE RDD 5-2006 specification static const int8_t Gaussian_LUT[2048+4] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33, -127, 17, -6, -105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, -1, 120, -127, -108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, 30, 12, 77, 112, 82, -61, -127, 111, -52, -29, 2, -49, -24, 58, -29, -73, 12, 112, 67, 79, -3, -114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, -105, 50, 16, -24, -35, -14, -15, -127, -55, -22, -55, -127, -112, 5, -26, -72, 127, 127, -2, 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, 99, -7, 88, 125, -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, 51, -16, 107, -8, 53, 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, -36, 19, 55, 17, -51, 40, 33, -37, 126, -39, -118, 17, -30, 0, 19, 98, 60, 101, -12, -73, -17, -52, 98, 3, 3, 60, 33, -3, -2, 10, -42, -106, -38, 14, 127, 16, -127, -31, -86, -39, -56, 46, -41, 75, 23, -19, -22, -70, 74, -54, -2, 32, -45, 17, -92, 59, -64, -67, 56, -102, -29, -87, -34, -92, 68, 5, -74, -61, 93, -43, 14, -26, -38, -126, -17, 16, -127, 64, 34, 31, 93, 17, -51, -59, 71, 77, 81, 127, 127, 61, 33, -106, -93, 0, 0, 75, -69, 71, 127, -19, -111, 30, 23, 15, 2, 39, 92, 5, 42, 2, -6, 38, 15, 114, -30, -37, 50, 44, 106, 27, 119, 7, -80, 25, -68, -21, 92, -11, -1, 18, 41, -50, 79, -127, -43, 127, 18, 11, -21, 32, -52, 27, -88, -90, -39, -19, -10, 24, -118, 72, -24, -44, 2, 12, 86, -107, 39, -33, -127, 47, 51, -24, -22, 46, 0, 15, -35, -69, -2, -74, 24, -6, 0, 29, -3, 45, 32, -32, 117, -45, 79, -24, -17, -109, -10, -70, 88, -48, 24, -91, 120, -37, 50, -127, 58, 32, -82, -10, -17, -7, 46, -127, -15, 89, 127, 17, 98, -39, -33, 37, 42, -40, -32, -21, 105, -19, 19, 19, -59, -9, 30, 0, -127, 34, 127, -84, 75, 24, -40, -49, -127, -107, -14, 45, -75, 1, 30, -20, 41, -68, -40, 12, 127, -3, 5, 20, -73, -59, -127, -3, -3, -53, -6, -119, 93, 120, -80, -50, 0, 20, -46, 67, 78, -12, -22, -127, 36, -41, 56, 119, -5, -116, -22, 68, -14, -90, 24, -82, -44, -127, 107, -25, -37, 40, -7, -7, -82, 5, -87, 44, -34, 9, -127, 39, 70, 49, -63, 74, -49, 109, -27, -89, -47, -39, 44, 49, -4, 60, -42, 80, 9, -127, -9, -56, -49, 125, -66, 47, 36, 117, 15, -11, -96, 109, 94, -17, -56, 70, 8, -14, -5, 50, 37, -45, 120, -30, -76, 40, -46, 6, 3, 69, 17, -78, 1, -79, 6, 127, 43, 26, 127, -127, 28, -55, -26, 55, 112, 48, 107, -1, -77, -1, 53, -9, -22, -43, 123, 108, 127, 102, 68, 46, 5, 1, 123, -13, -55, -34, -49, 89, 65, -105, -5, 94, -53, 62, 45, 30, 46, 18, -35, 15, 41, 47, -98, -24, 94, -75, 127, -114, 127, -68, 1, -17, 51, -95, 47, 12, 34, -45, -75, 89, -107, -9, -58, -29, -109, -24, 127, -61, -13, 77, -45, 17, 19, 83, -24, 9, 127, -66, 54, 4, 26, 13, 111, 43, -113, -22, 10, -24, 83, 67, -14, 75, -123, 59, 127, -12, 99, -19, 64, -38, 54, 9, 7, 61, -56, 3, -57, 113, -104, -59, 3, -9, -47, 74, 85, -55, -34, 12, 118, 28, 93, -72, 13, -99, -72, -20, 30, 72, -94, 19, -54, 64, -12, -63, -25, 65, 72, -10, 127, 0, -127, 103, -20, -73, -112, -103, -6, 28, -42, -21, -59, -29, -26, 19, -4, -51, 94, -58, -95, -37, 35, 20, -69, 127, -19, -127, -22, -120, -53, 37, 74, -127, -1, -12, -119, -53, -28, 38, 69, 17, 16, -114, 89, 62, 24, 37, -23, 49, -101, -32, -9, -95, -53, 5, 93, -23, -49, -8, 51, 3, -75, -90, -10, -39, 127, -86, -22, 20, 20, 113, 75, 52, -31, 92, -63, 7, -12, 46, 36, 101, -43, -17, -53, -7, -38, -76, -31, -21, 62, 31, 62, 20, -127, 31, 64, 36, 102, -85, -10, 77, 80, 58, -79, -8, 35, 8, 80, -24, -9, 3, -17, 72, 127, 83, -87, 55, 18, -119, -123, 36, 10, 127, 56, -55, 113, 13, 26, 32, -13, -48, 22, -13, 5, 58, 27, 24, 26, -11, -36, 37, -92, 78, 81, 9, 51, 14, 67, -13, 0, 32, 45, -76, 32, -39, -22, -49, -127, -27, 31, -9, 36, 14, 71, 13, 57, 12, -53, -86, 53, -44, -35, 2, 127, 12, -66, -44, 46, -115, 3, 10, 56, -35, 119, -19, -61, 52, -59, -127, -49, -23, 4, -5, 17, -82, -6, 127, 25, 79, 67, 64, -25, 14, -64, -37, -127, -28, 21, -63, 66, -53, -41, 109, -62, 15, -22, 13, 29, -63, 20, 27, 95, -44, -59, -116, -10, 79, -49, 22, -43, -16, 46, -47, -120, -36, -29, -52, -44, 29, 127, -13, 49, -9, -127, 75, -28, -23, 88, 59, 11, -95, 81, -59, 58, 60, -26, 40, -92, -3, -22, -58, -45, -59, -22, -53, 71, -29, 66, -32, -23, 14, -17, -66, -24, -28, -62, 47, 38, 17, 16, -37, -24, -11, 8, -27, -19, 59, 45, -49, -47, -4, -22, -81, 30, -67, -127, 74, 102, 5, -18, 98, 34, -66, 42, -52, 7, -59, 24, -58, -19, -24, -118, -73, 91, 15, -16, 79, -32, -79, -127, -36, 41, 77, -83, 2, 56, 22, -75, 127, -16, -21, 12, 31, 56, -113, -127, 90, 55, 61, 12, 55, -14, -113, -14, 32, 49, -67, -17, 91, -10, 1, 21, 69, -70, 99, -19, -112, 66, -90, -10, -9, -71, 127, 50, -81, -49, 24, 61, -61, -111, 7, -41, 127, 88, -66, 108, -127, -6, 36, -14, 41, -50, 14, 14, 73, -101, -28, 77, 127, -8, -100, 88, 38, 121, 88, -125, -60, 13, -94, -115, 20, -67, -87, -94, -119, 44, -28, -30, 18, 5, -53, -61, 20, -43, 11, -77, -60, 13, 29, 3, 6, -72, 38, -60, -11, 108, -53, 41, 66, -12, -127, -127, -49, 24, 29, 46, 36, 91, 34, -33, 116, -51, -34, -52, 91, 7, -83, 73, -26, -103, 24, -10, 76, 84, 5, 68, -80, -13, -17, -32, -48, 20, 50, 26, 10, 63, -104, -14, 37, 127, 114, 97, 35, 1, -33, -55, 127, -124, -33, 61, -7, 119, -32, -127, -53, -42, 63, 3, -5, -26, 70, -58, -33, -44, -43, 34, -56, -127, 127, 25, -35, -11, 16, -81, 29, -58, 40, -127, -127, 20, -47, -11, -36, -63, -52, -32, -82, 78, -76, -73, 8, 27, -72, -9, -74, -85, -86, -57, 25, 78, -10, -97, 35, -65, 8, -59, 14, 1, -42, 32, -88, -44, 17, -3, -9, 59, 40, 12, -108, -40, 24, 34, 18, -28, 2, 51, -110, -4, 100, 1, 65, 22, 0, 127, 61, 45, 25, -31, 6, 9, -7, -48, 99, 16, 44, -2, -40, 32, -39, -52, 10, -110, -19, 56, -127, 69, 26, 51, 92, 40, 61, -52, 45, -38, 13, 85, 122, 27, 66, 45, -111, -83, -3, 31, 37, 19, -36, 58, 71, 39, -78, -47, 58, -78, 8, -62, -36, -14, 61, 42, -127, 71, -4, 24, -54, 52, -127, 67, -4, -42, 30, -63, 59, -3, -1, -18, -46, -92, -81, -96, -14, -53, -10, -11, -77, 13, 1, 8, -67, -127, 127, -28, 26, -14, 18, -13, -26, 2, 10, -46, -32, -15, 27, -31, -59, 59, 77, -121, 28, 40, -54, -62, -31, -21, -37, -32, -6, -127, -25, -60, 70, -127, 112, -127, 127, 88, -7, 116, 110, 53, 87, -127, 3, 16, 23, 74, -106, -51, 3, 74, -82, -112, -74, 65, 81, 25, 53, 127, -45, -50, -103, -41, -65, -29, 79, -67, 64, -33, -30, -8, 127, 0, -13, -51, 67, -14, 5, -92, 29, -35, -8, -90, -57, -3, 36, 43, 44, -31, -69, -7, 36, 39, -51, 43, -81, 58, 6, 127, 12, 57, 66, 46, 59, -43, -42, 41, -15, -120, 24, 3, -11, 19, -13, 51, 28, 3, 55, -48, -12, -1, 2, 97, -19, 29, 42, 13, 43, 78, -44, 56, -108, -43, -19, 127, 15, -11, -18, -81, 83, -37, 77, -109, 15, 65, -50, 43, 12, 13, 27, 28, 61, 57, 30, 26, 106, -18, 56, 13, 97, 4, -8, -62, -103, 94, 108, -44, 52, 27, -47, -9, 105, -53, 46, 89, 103, -33, 38, -34, 55, 51, 70, -94, -35, -87, -107, -19, -31, 9, -19, 79, -14, 77, 5, -19, -107, 85, 21, -45, -39, -42, 9, -29, 74, 47, -75, 60, -127, 120, -112, -57, -32, 41, 7, 79, 76, 66, 57, 41, -25, 31, 37, -47, -36, 43, -73, -37, 63, 127, -69, -52, 90, -33, -61, 60, -55, 44, 15, 4, -67, 13, -92, 64, 29, -39, -3, 83, -2, -38, -85, -86, 58, 35, -69, -61, 29, -37, -95, -78, 4, 30, -4, -32, -80, -22, -9, -77, 46, 7, -93, -71, 65, 9, -50, 127, -70, 26, -12, -39, -114, 63, -127, -100, 4, -32, 111, 22, -60, 65, -101, 26, -42, 21, -59, -27, -74, 2, -94, 6, 126, 5, 76, -88, -9, -43, -101, 127, 1, 125, 92, -63, 52, 56, 4, 81, -127, 127, 80, 127, -29, 30, 116, -74, -17, -57, 105, 48, 45, 25, -72, 48, -38, -108, 31, -34, 4, -11, 41, -127, 52, -104, -43, -37, 52, 2, 47, 87, -9, 77, 27, -41, -25, 90, 86, -56, 75, 10, 33, 78, 58, 127, 127, -7, -73, 49, -33, -106, -35, 38, 57, 53, -17, -4, 83, 52, -108, 54, -125, 28, 23, 56, -43, -88, -17, -6, 47, 23, -9, 0, -13, 111, 75, 27, -52, -38, -34, 39, 30, 66, 39, 38, -64, 38, 3, 21, -32, -51, -28, 54, -38, -87, 20, 52, 115, 18, -81, -70, 0, -14, -46, -46, -3, 125, 16, -14, 23, -82, -84, -69, -20, -65, -127, 9, 81, -49, 61, 7, -36, -45, -42, 57, -26, 47, 20, -85, 46, -13, 41, -37, -75, -60, 86, -78, -127, 12, 50, 2, -3, 13, 47, 5, 19, -78, -55, -27, 65, -71, 12, -108, 20, -16, 11, -31, 63, -55, 37, 75, -17, 127, -73, -33, -28, -120, 105, 68, 106, -103, -106, 71, 61, 2, 23, -3, 33, -5, -15, -67, -15, -23, -54, 15, -63, 76, 58, -110, 1, 83, -27, 22, 75, -39, -17, -11, 64, -17, -127, -54, -66, 31, 96, 116, 3, -114, -7, -108, -63, 97, 9, 50, 8, 75, -28, 72, 112, -36, -112, 95, -50, 23, -13, -19, 55, 21, 23, 92, 91, 22, -49, 16, -75, 23, 9, -49, -97, -37, 49, -36, 36, -127, -86, 43, 127, -24, -24, 84, 83, -35, -34, -12, 109, 102, -38, 51, -68, 34, 19, -22, 49, -32, 127, 40, 24, -93, -4, -3, 105, 3, -58, -18, 8, 127, -18, 125, 68, 69, -62, 30, -36, 54, -57, -24, 17, 43, -36, -27, -57, -67, -21, -10, -49, 68, 12, 65, 4, 48, 55, 127, -75, 44, 89, -66, -13, -78, -82, -91, 22, 30, 33, -40, -87, -34, 96, -91, 39, 10, -64, -3, -12, 127, -50, -37, -56, 23, -35, -36, -54, 90, -91, 2, 50, 77, -6, -127, 16, 46, -5, -73, 0, -56, -18, -72, 28, 93, 60, 49, 20, 18, 111, -111, 32, -83, 47, 47, -10, 35, -88, 43, 57, -98, 127, -17, 0, 1, -39, -127, -2, 0, 63, 93, 0, 36, -66, -61, -19, 39, -127, 58, 50, -17, 127, 88, -43, -108, -51, -16, 7, -36, 68, 46, -14, 107, 40, 57, 7, 19, 8, 3, 88, -90, -92, -18, -21, -24, 13, 7, -4, -78, -91, -4, 8, -35, -5, 19, 2, -111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47, // Repeat the beginning of the array to allow wrapping reads -11, 12, 103, -11, }; static const uint32_t Seed_LUT[256] = { 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 }; // Note: This is pre-transposed, i.e. stored column-major order static const int8_t R64T[64][64] = { { 32, 45, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 4, 3, 2, 1, }, { 32, 45, 45, 44, 43, 42, 41, 39, 38, 36, 34, 31, 29, 26, 23, 20, 17, 14, 11, 8, 4, 1, -2, -6, -9, -12, -15, -18, -21, -24, -27, -30, -32, -34, -36, -38, -40, -41, -43, -44, -44, -45, -45, -45, -45, -45, -44, -43, -42, -40, -39, -37, -35, -33, -30, -28, -25, -22, -19, -16, -13, -10, -7, -3, }, { 32, 45, 44, 42, 40, 37, 34, 30, 25, 20, 15, 10, 4, -1, -7, -12, -17, -22, -27, -31, -35, -38, -41, -43, -44, -45, -45, -45, -43, -41, -39, -36, -32, -28, -23, -18, -13, -8, -2, 3, 9, 14, 19, 24, 29, 33, 36, 39, 42, 44, 45, 45, 45, 44, 43, 40, 38, 34, 30, 26, 21, 16, 11, 6, }, { 32, 45, 43, 39, 35, 30, 23, 16, 9, 1, -7, -14, -21, -28, -34, -38, -42, -44, -45, -45, -43, -40, -36, -31, -25, -18, -11, -3, 4, 12, 19, 26, 32, 37, 41, 44, 45, 45, 44, 41, 38, 33, 27, 20, 13, 6, -2, -10, -17, -24, -30, -36, -40, -43, -45, -45, -44, -42, -39, -34, -29, -22, -15, -8, }, { 32, 44, 41, 36, 29, 20, 11, 1, -9, -18, -27, -34, -40, -44, -45, -45, -42, -37, -30, -22, -13, -3, 7, 16, 25, 33, 39, 43, 45, 45, 43, 38, 32, 24, 15, 6, -4, -14, -23, -31, -38, -42, -45, -45, -43, -39, -34, -26, -17, -8, 2, 12, 21, 30, 36, 41, 44, 45, 44, 40, 35, 28, 19, 10, }, { 32, 44, 39, 31, 21, 10, -2, -14, -25, -34, -41, -45, -45, -42, -36, -28, -17, -6, 7, 18, 29, 37, 43, 45, 44, 40, 34, 24, 13, 1, -11, -22, -32, -39, -44, -45, -43, -38, -30, -20, -9, 3, 15, 26, 35, 41, 45, 45, 42, 36, 27, 16, 4, -8, -19, -30, -38, -43, -45, -44, -40, -33, -23, -12, }, { 32, 43, 36, 26, 13, -1, -15, -28, -38, -44, -45, -42, -35, -24, -11, 3, 17, 30, 39, 44, 45, 41, 34, 22, 9, -6, -19, -31, -40, -45, -45, -40, -32, -20, -7, 8, 21, 33, 41, 45, 44, 39, 30, 18, 4, -10, -23, -34, -42, -45, -44, -38, -29, -16, -2, 12, 25, 36, 43, 45, 43, 37, 27, 14, }, { 32, 42, 34, 20, 4, -12, -27, -38, -44, -45, -39, -28, -13, 3, 19, 33, 42, 45, 43, 34, 21, 6, -11, -26, -38, -44, -45, -39, -29, -14, 2, 18, 32, 41, 45, 43, 35, 22, 7, -10, -25, -37, -44, -45, -40, -30, -15, 1, 17, 31, 41, 45, 43, 36, 23, 8, -9, -24, -36, -44, -45, -40, -30, -16, }, { 32, 41, 30, 14, -4, -22, -36, -44, -44, -37, -23, -6, 13, 30, 41, 45, 42, 31, 15, -3, -21, -36, -44, -45, -38, -24, -7, 12, 29, 40, 45, 42, 32, 16, -2, -20, -35, -44, -45, -38, -25, -8, 11, 28, 40, 45, 43, 33, 17, -1, -19, -34, -43, -45, -39, -26, -9, 10, 27, 39, 45, 43, 34, 18, }, { 32, 40, 27, 8, -13, -31, -43, -45, -38, -22, -2, 18, 35, 44, 44, 34, 17, -3, -23, -38, -45, -42, -30, -12, 9, 28, 41, 45, 40, 26, 7, -14, -32, -43, -45, -37, -21, -1, 19, 36, 44, 44, 34, 16, -4, -24, -39, -45, -42, -30, -11, 10, 29, 41, 45, 39, 25, 6, -15, -33, -43, -45, -36, -20, }, { 32, 39, 23, 1, -21, -38, -45, -40, -25, -3, 19, 37, 45, 41, 27, 6, -17, -36, -45, -42, -29, -8, 15, 34, 44, 43, 30, 10, -13, -33, -44, -44, -32, -12, 11, 31, 43, 44, 34, 14, -9, -30, -43, -45, -35, -16, 7, 28, 42, 45, 36, 18, -4, -26, -41, -45, -38, -20, 2, 24, 40, 45, 39, 22, }, { 32, 38, 19, -6, -29, -43, -44, -31, -9, 16, 36, 45, 40, 22, -2, -26, -42, -45, -34, -12, 13, 34, 45, 41, 25, 1, -23, -40, -45, -36, -15, 10, 32, 44, 43, 28, 4, -20, -39, -45, -38, -18, 7, 30, 43, 44, 30, 8, -17, -37, -45, -39, -21, 3, 27, 42, 44, 33, 11, -14, -35, -45, -41, -24, }, { 32, 37, 15, -12, -35, -45, -39, -18, 9, 33, 45, 40, 21, -6, -30, -44, -42, -24, 2, 28, 43, 43, 27, 1, -25, -42, -44, -30, -4, 22, 41, 45, 32, 8, -19, -39, -45, -34, -11, 16, 38, 45, 36, 14, -13, -36, -45, -38, -17, 10, 34, 45, 40, 20, -7, -31, -44, -41, -23, 3, 29, 44, 43, 26, }, { 32, 36, 11, -18, -40, -45, -30, -3, 25, 43, 43, 24, -4, -31, -45, -39, -17, 12, 36, 45, 35, 10, -19, -40, -44, -30, -2, 26, 43, 42, 23, -6, -32, -45, -39, -16, 13, 37, 45, 34, 9, -20, -41, -44, -29, -1, 27, 44, 42, 22, -7, -33, -45, -38, -15, 14, 38, 45, 34, 8, -21, -41, -44, -28, }, { 32, 34, 7, -24, -43, -41, -19, 12, 38, 45, 30, 1, -29, -45, -39, -14, 17, 40, 44, 26, -4, -33, -45, -36, -9, 22, 43, 42, 21, -10, -36, -45, -32, -3, 27, 44, 40, 16, -15, -39, -44, -28, 2, 31, 45, 37, 11, -20, -42, -43, -23, 8, 35, 45, 34, 6, -25, -44, -41, -18, 13, 38, 45, 30, }, { 32, 33, 2, -30, -45, -36, -7, 26, 44, 38, 11, -22, -43, -40, -15, 18, 42, 42, 19, -14, -40, -44, -23, 10, 38, 45, 27, -6, -35, -45, -30, 1, 32, 45, 34, 3, -29, -45, -36, -8, 25, 44, 39, 12, -21, -43, -41, -16, 17, 41, 43, 20, -13, -39, -44, -24, 9, 37, 45, 28, -4, -34, -45, -31, }, { 32, 31, -2, -34, -45, -28, 7, 37, 44, 24, -11, -39, -43, -20, 15, 41, 42, 16, -19, -43, -40, -12, 23, 44, 38, 8, -27, -45, -35, -3, 30, 45, 32, -1, -34, -45, -29, 6, 36, 45, 25, -10, -39, -44, -21, 14, 41, 42, 17, -18, -43, -40, -13, 22, 44, 38, 9, -26, -45, -36, -4, 30, 45, 33, }, { 32, 30, -7, -38, -43, -18, 19, 44, 38, 6, -30, -45, -29, 8, 39, 43, 17, -20, -44, -37, -4, 31, 45, 28, -9, -39, -43, -16, 21, 44, 36, 3, -32, -45, -27, 10, 40, 42, 15, -22, -44, -36, -2, 33, 45, 26, -11, -40, -42, -14, 23, 45, 35, 1, -34, -45, -25, 12, 41, 41, 13, -24, -45, -34, }, { 32, 28, -11, -41, -40, -8, 30, 45, 25, -14, -43, -38, -4, 33, 45, 22, -17, -44, -36, -1, 35, 44, 19, -20, -44, -34, 2, 37, 43, 16, -23, -45, -32, 6, 39, 42, 13, -26, -45, -30, 9, 40, 41, 10, -29, -45, -27, 12, 42, 39, 7, -31, -45, -24, 15, 43, 38, 3, -34, -45, -21, 18, 44, 36, }, { 32, 26, -15, -44, -35, 3, 39, 41, 9, -31, -45, -20, 21, 45, 30, -10, -42, -38, -2, 36, 43, 14, -27, -45, -25, 16, 44, 34, -4, -39, -41, -8, 32, 45, 19, -22, -45, -30, 11, 42, 38, 1, -36, -43, -13, 28, 45, 24, -17, -44, -34, 6, 40, 40, 7, -33, -44, -18, 23, 45, 29, -12, -43, -37, }, { 32, 24, -19, -45, -29, 14, 44, 33, -9, -42, -36, 3, 40, 39, 2, -37, -42, -8, 34, 44, 13, -30, -45, -18, 25, 45, 23, -20, -45, -28, 15, 44, 32, -10, -43, -36, 4, 40, 39, 1, -38, -41, -7, 34, 43, 12, -30, -45, -17, 26, 45, 22, -21, -45, -27, 16, 44, 31, -11, -43, -35, 6, 41, 38, }, { 32, 22, -23, -45, -21, 24, 45, 20, -25, -45, -19, 26, 45, 18, -27, -45, -17, 28, 45, 16, -29, -45, -15, 30, 44, 14, -30, -44, -13, 31, 44, 12, -32, -44, -11, 33, 43, 10, -34, -43, -9, 34, 43, 8, -35, -42, -7, 36, 42, 6, -36, -41, -4, 37, 41, 3, -38, -40, -2, 38, 40, 1, -39, -39, }, { 32, 20, -27, -45, -13, 33, 43, 6, -38, -39, 2, 41, 35, -10, -44, -30, 17, 45, 23, -24, -45, -16, 30, 44, 9, -36, -41, -1, 40, 37, -7, -43, -32, 14, 45, 26, -21, -45, -19, 28, 44, 12, -34, -42, -4, 38, 39, -3, -42, -34, 11, 44, 29, -18, -45, -22, 25, 45, 15, -31, -43, -8, 36, 40, }, { 32, 18, -30, -43, -4, 39, 36, -10, -44, -26, 23, 45, 13, -34, -41, 1, 42, 33, -15, -45, -21, 28, 44, 8, -38, -38, 7, 44, 29, -20, -45, -16, 32, 42, 2, -40, -35, 12, 45, 24, -25, -45, -11, 36, 40, -3, -43, -31, 17, 45, 19, -30, -43, -6, 39, 37, -9, -44, -27, 22, 45, 14, -34, -41, }, { 32, 16, -34, -40, 4, 44, 27, -24, -44, -8, 39, 36, -13, -45, -19, 31, 42, -1, -43, -30, 21, 45, 11, -37, -38, 10, 45, 22, -29, -43, -2, 41, 32, -18, -45, -14, 35, 39, -7, -44, -25, 26, 44, 6, -40, -34, 15, 45, 17, -33, -41, 3, 43, 28, -23, -45, -9, 38, 36, -12, -45, -20, 30, 42, }, { 32, 14, -36, -37, 13, 45, 15, -36, -38, 12, 45, 16, -35, -38, 11, 45, 17, -34, -39, 10, 45, 18, -34, -39, 9, 45, 19, -33, -40, 8, 45, 20, -32, -40, 7, 45, 21, -31, -41, 6, 44, 22, -30, -41, 4, 44, 23, -30, -42, 3, 44, 24, -29, -42, 2, 44, 25, -28, -43, 1, 43, 26, -27, -43, }, { 32, 12, -39, -33, 21, 44, 2, -43, -25, 30, 41, -8, -45, -16, 36, 36, -17, -45, -7, 41, 29, -26, -43, 3, 44, 20, -34, -38, 13, 45, 11, -39, -32, 22, 44, 1, -43, -24, 30, 40, -9, -45, -15, 37, 35, -18, -45, -6, 42, 28, -27, -42, 4, 45, 19, -34, -38, 14, 45, 10, -40, -31, 23, 44, }, { 32, 10, -41, -28, 29, 40, -11, -45, -9, 41, 27, -30, -40, 12, 45, 8, -42, -26, 30, 39, -13, -45, -7, 42, 25, -31, -39, 14, 45, 6, -43, -24, 32, 38, -15, -45, -4, 43, 23, -33, -38, 16, 45, 3, -43, -22, 34, 37, -17, -45, -2, 44, 21, -34, -36, 18, 44, 1, -44, -20, 35, 36, -19, -44, }, { 32, 8, -43, -22, 35, 34, -23, -42, 9, 45, 7, -43, -21, 36, 34, -24, -42, 10, 45, 6, -43, -20, 36, 33, -25, -41, 11, 45, 4, -44, -19, 37, 32, -26, -41, 12, 45, 3, -44, -18, 38, 31, -27, -40, 13, 45, 2, -44, -17, 38, 30, -28, -40, 14, 45, 1, -44, -16, 39, 30, -29, -39, 15, 45, }, { 32, 6, -44, -16, 40, 26, -34, -34, 25, 40, -15, -44, 4, 45, 7, -44, -17, 39, 27, -33, -35, 24, 41, -14, -44, 3, 45, 8, -43, -18, 39, 28, -32, -36, 23, 41, -13, -45, 2, 45, 9, -43, -19, 38, 29, -31, -36, 22, 42, -12, -45, 1, 45, 10, -43, -20, 38, 30, -30, -37, 21, 42, -11, -45, }, { 32, 3, -45, -10, 43, 16, -41, -22, 38, 28, -34, -33, 29, 37, -23, -40, 17, 43, -11, -45, 4, 45, 2, -45, -9, 44, 15, -41, -21, 38, 27, -34, -32, 30, 36, -24, -40, 18, 43, -12, -44, 6, 45, 1, -45, -8, 44, 14, -42, -20, 39, 26, -35, -31, 30, 36, -25, -39, 19, 42, -13, -44, 7, 45, }, { 32, 1, -45, -3, 45, 6, -45, -8, 44, 10, -44, -12, 43, 14, -43, -16, 42, 18, -41, -20, 40, 22, -39, -24, 38, 26, -36, -28, 35, 30, -34, -31, 32, 33, -30, -34, 29, 36, -27, -37, 25, 38, -23, -39, 21, 40, -19, -41, 17, 42, -15, -43, 13, 44, -11, -44, 9, 45, -7, -45, 4, 45, -2, -45, }, { 32, -1, -45, 3, 45, -6, -45, 8, 44, -10, -44, 12, 43, -14, -43, 16, 42, -18, -41, 20, 40, -22, -39, 24, 38, -26, -36, 28, 35, -30, -34, 31, 32, -33, -30, 34, 29, -36, -27, 37, 25, -38, -23, 39, 21, -40, -19, 41, 17, -42, -15, 43, 13, -44, -11, 44, 9, -45, -7, 45, 4, -45, -2, 45, }, { 32, -3, -45, 10, 43, -16, -41, 22, 38, -28, -34, 33, 29, -37, -23, 40, 17, -43, -11, 45, 4, -45, 2, 45, -9, -44, 15, 41, -21, -38, 27, 34, -32, -30, 36, 24, -40, -18, 43, 12, -44, -6, 45, -1, -45, 8, 44, -14, -42, 20, 39, -26, -35, 31, 30, -36, -25, 39, 19, -42, -13, 44, 7, -45, }, { 32, -6, -44, 16, 40, -26, -34, 34, 25, -40, -15, 44, 4, -45, 7, 44, -17, -39, 27, 33, -35, -24, 41, 14, -44, -3, 45, -8, -43, 18, 39, -28, -32, 36, 23, -41, -13, 45, 2, -45, 9, 43, -19, -38, 29, 31, -36, -22, 42, 12, -45, -1, 45, -10, -43, 20, 38, -30, -30, 37, 21, -42, -11, 45, }, { 32, -8, -43, 22, 35, -34, -23, 42, 9, -45, 7, 43, -21, -36, 34, 24, -42, -10, 45, -6, -43, 20, 36, -33, -25, 41, 11, -45, 4, 44, -19, -37, 32, 26, -41, -12, 45, -3, -44, 18, 38, -31, -27, 40, 13, -45, 2, 44, -17, -38, 30, 28, -40, -14, 45, -1, -44, 16, 39, -30, -29, 39, 15, -45, }, { 32, -10, -41, 28, 29, -40, -11, 45, -9, -41, 27, 30, -40, -12, 45, -8, -42, 26, 30, -39, -13, 45, -7, -42, 25, 31, -39, -14, 45, -6, -43, 24, 32, -38, -15, 45, -4, -43, 23, 33, -38, -16, 45, -3, -43, 22, 34, -37, -17, 45, -2, -44, 21, 34, -36, -18, 44, -1, -44, 20, 35, -36, -19, 44, }, { 32, -12, -39, 33, 21, -44, 2, 43, -25, -30, 41, 8, -45, 16, 36, -36, -17, 45, -7, -41, 29, 26, -43, -3, 44, -20, -34, 38, 13, -45, 11, 39, -32, -22, 44, -1, -43, 24, 30, -40, -9, 45, -15, -37, 35, 18, -45, 6, 42, -28, -27, 42, 4, -45, 19, 34, -38, -14, 45, -10, -40, 31, 23, -44, }, { 32, -14, -36, 37, 13, -45, 15, 36, -38, -12, 45, -16, -35, 38, 11, -45, 17, 34, -39, -10, 45, -18, -34, 39, 9, -45, 19, 33, -40, -8, 45, -20, -32, 40, 7, -45, 21, 31, -41, -6, 44, -22, -30, 41, 4, -44, 23, 30, -42, -3, 44, -24, -29, 42, 2, -44, 25, 28, -43, -1, 43, -26, -27, 43, }, { 32, -16, -34, 40, 4, -44, 27, 24, -44, 8, 39, -36, -13, 45, -19, -31, 42, 1, -43, 30, 21, -45, 11, 37, -38, -10, 45, -22, -29, 43, -2, -41, 32, 18, -45, 14, 35, -39, -7, 44, -25, -26, 44, -6, -40, 34, 15, -45, 17, 33, -41, -3, 43, -28, -23, 45, -9, -38, 36, 12, -45, 20, 30, -42, }, { 32, -18, -30, 43, -4, -39, 36, 10, -44, 26, 23, -45, 13, 34, -41, -1, 42, -33, -15, 45, -21, -28, 44, -8, -38, 38, 7, -44, 29, 20, -45, 16, 32, -42, 2, 40, -35, -12, 45, -24, -25, 45, -11, -36, 40, 3, -43, 31, 17, -45, 19, 30, -43, 6, 39, -37, -9, 44, -27, -22, 45, -14, -34, 41, }, { 32, -20, -27, 45, -13, -33, 43, -6, -38, 39, 2, -41, 35, 10, -44, 30, 17, -45, 23, 24, -45, 16, 30, -44, 9, 36, -41, 1, 40, -37, -7, 43, -32, -14, 45, -26, -21, 45, -19, -28, 44, -12, -34, 42, -4, -38, 39, 3, -42, 34, 11, -44, 29, 18, -45, 22, 25, -45, 15, 31, -43, 8, 36, -40, }, { 32, -22, -23, 45, -21, -24, 45, -20, -25, 45, -19, -26, 45, -18, -27, 45, -17, -28, 45, -16, -29, 45, -15, -30, 44, -14, -30, 44, -13, -31, 44, -12, -32, 44, -11, -33, 43, -10, -34, 43, -9, -34, 43, -8, -35, 42, -7, -36, 42, -6, -36, 41, -4, -37, 41, -3, -38, 40, -2, -38, 40, -1, -39, 39, }, { 32, -24, -19, 45, -29, -14, 44, -33, -9, 42, -36, -3, 40, -39, 2, 37, -42, 8, 34, -44, 13, 30, -45, 18, 25, -45, 23, 20, -45, 28, 15, -44, 32, 10, -43, 36, 4, -40, 39, -1, -38, 41, -7, -34, 43, -12, -30, 45, -17, -26, 45, -22, -21, 45, -27, -16, 44, -31, -11, 43, -35, -6, 41, -38, }, { 32, -26, -15, 44, -35, -3, 39, -41, 9, 31, -45, 20, 21, -45, 30, 10, -42, 38, -2, -36, 43, -14, -27, 45, -25, -16, 44, -34, -4, 39, -41, 8, 32, -45, 19, 22, -45, 30, 11, -42, 38, -1, -36, 43, -13, -28, 45, -24, -17, 44, -34, -6, 40, -40, 7, 33, -44, 18, 23, -45, 29, 12, -43, 37, }, { 32, -28, -11, 41, -40, 8, 30, -45, 25, 14, -43, 38, -4, -33, 45, -22, -17, 44, -36, 1, 35, -44, 19, 20, -44, 34, 2, -37, 43, -16, -23, 45, -32, -6, 39, -42, 13, 26, -45, 30, 9, -40, 41, -10, -29, 45, -27, -12, 42, -39, 7, 31, -45, 24, 15, -43, 38, -3, -34, 45, -21, -18, 44, -36, }, { 32, -30, -7, 38, -43, 18, 19, -44, 38, -6, -30, 45, -29, -8, 39, -43, 17, 20, -44, 37, -4, -31, 45, -28, -9, 39, -43, 16, 21, -44, 36, -3, -32, 45, -27, -10, 40, -42, 15, 22, -44, 36, -2, -33, 45, -26, -11, 40, -42, 14, 23, -45, 35, -1, -34, 45, -25, -12, 41, -41, 13, 24, -45, 34, }, { 32, -31, -2, 34, -45, 28, 7, -37, 44, -24, -11, 39, -43, 20, 15, -41, 42, -16, -19, 43, -40, 12, 23, -44, 38, -8, -27, 45, -35, 3, 30, -45, 32, 1, -34, 45, -29, -6, 36, -45, 25, 10, -39, 44, -21, -14, 41, -42, 17, 18, -43, 40, -13, -22, 44, -38, 9, 26, -45, 36, -4, -30, 45, -33, }, { 32, -33, 2, 30, -45, 36, -7, -26, 44, -38, 11, 22, -43, 40, -15, -18, 42, -42, 19, 14, -40, 44, -23, -10, 38, -45, 27, 6, -35, 45, -30, -1, 32, -45, 34, -3, -29, 45, -36, 8, 25, -44, 39, -12, -21, 43, -41, 16, 17, -41, 43, -20, -13, 39, -44, 24, 9, -37, 45, -28, -4, 34, -45, 31, }, { 32, -34, 7, 24, -43, 41, -19, -12, 38, -45, 30, -1, -29, 45, -39, 14, 17, -40, 44, -26, -4, 33, -45, 36, -9, -22, 43, -42, 21, 10, -36, 45, -32, 3, 27, -44, 40, -16, -15, 39, -44, 28, 2, -31, 45, -37, 11, 20, -42, 43, -23, -8, 35, -45, 34, -6, -25, 44, -41, 18, 13, -38, 45, -30, }, { 32, -36, 11, 18, -40, 45, -30, 3, 25, -43, 43, -24, -4, 31, -45, 39, -17, -12, 36, -45, 35, -10, -19, 40, -44, 30, -2, -26, 43, -42, 23, 6, -32, 45, -39, 16, 13, -37, 45, -34, 9, 20, -41, 44, -29, 1, 27, -44, 42, -22, -7, 33, -45, 38, -15, -14, 38, -45, 34, -8, -21, 41, -44, 28, }, { 32, -37, 15, 12, -35, 45, -39, 18, 9, -33, 45, -40, 21, 6, -30, 44, -42, 24, 2, -28, 43, -43, 27, -1, -25, 42, -44, 30, -4, -22, 41, -45, 32, -8, -19, 39, -45, 34, -11, -16, 38, -45, 36, -14, -13, 36, -45, 38, -17, -10, 34, -45, 40, -20, -7, 31, -44, 41, -23, -3, 29, -44, 43, -26, }, { 32, -38, 19, 6, -29, 43, -44, 31, -9, -16, 36, -45, 40, -22, -2, 26, -42, 45, -34, 12, 13, -34, 45, -41, 25, -1, -23, 40, -45, 36, -15, -10, 32, -44, 43, -28, 4, 20, -39, 45, -38, 18, 7, -30, 43, -44, 30, -8, -17, 37, -45, 39, -21, -3, 27, -42, 44, -33, 11, 14, -35, 45, -41, 24, }, { 32, -39, 23, -1, -21, 38, -45, 40, -25, 3, 19, -37, 45, -41, 27, -6, -17, 36, -45, 42, -29, 8, 15, -34, 44, -43, 30, -10, -13, 33, -44, 44, -32, 12, 11, -31, 43, -44, 34, -14, -9, 30, -43, 45, -35, 16, 7, -28, 42, -45, 36, -18, -4, 26, -41, 45, -38, 20, 2, -24, 40, -45, 39, -22, }, { 32, -40, 27, -8, -13, 31, -43, 45, -38, 22, -2, -18, 35, -44, 44, -34, 17, 3, -23, 38, -45, 42, -30, 12, 9, -28, 41, -45, 40, -26, 7, 14, -32, 43, -45, 37, -21, 1, 19, -36, 44, -44, 34, -16, -4, 24, -39, 45, -42, 30, -11, -10, 29, -41, 45, -39, 25, -6, -15, 33, -43, 45, -36, 20, }, { 32, -41, 30, -14, -4, 22, -36, 44, -44, 37, -23, 6, 13, -30, 41, -45, 42, -31, 15, 3, -21, 36, -44, 45, -38, 24, -7, -12, 29, -40, 45, -42, 32, -16, -2, 20, -35, 44, -45, 38, -25, 8, 11, -28, 40, -45, 43, -33, 17, 1, -19, 34, -43, 45, -39, 26, -9, -10, 27, -39, 45, -43, 34, -18, }, { 32, -42, 34, -20, 4, 12, -27, 38, -44, 45, -39, 28, -13, -3, 19, -33, 42, -45, 43, -34, 21, -6, -11, 26, -38, 44, -45, 39, -29, 14, 2, -18, 32, -41, 45, -43, 35, -22, 7, 10, -25, 37, -44, 45, -40, 30, -15, -1, 17, -31, 41, -45, 43, -36, 23, -8, -9, 24, -36, 44, -45, 40, -30, 16, }, { 32, -43, 36, -26, 13, 1, -15, 28, -38, 44, -45, 42, -35, 24, -11, -3, 17, -30, 39, -44, 45, -41, 34, -22, 9, 6, -19, 31, -40, 45, -45, 40, -32, 20, -7, -8, 21, -33, 41, -45, 44, -39, 30, -18, 4, 10, -23, 34, -42, 45, -44, 38, -29, 16, -2, -12, 25, -36, 43, -45, 43, -37, 27, -14, }, { 32, -44, 39, -31, 21, -10, -2, 14, -25, 34, -41, 45, -45, 42, -36, 28, -17, 6, 7, -18, 29, -37, 43, -45, 44, -40, 34, -24, 13, -1, -11, 22, -32, 39, -44, 45, -43, 38, -30, 20, -9, -3, 15, -26, 35, -41, 45, -45, 42, -36, 27, -16, 4, 8, -19, 30, -38, 43, -45, 44, -40, 33, -23, 12, }, { 32, -44, 41, -36, 29, -20, 11, -1, -9, 18, -27, 34, -40, 44, -45, 45, -42, 37, -30, 22, -13, 3, 7, -16, 25, -33, 39, -43, 45, -45, 43, -38, 32, -24, 15, -6, -4, 14, -23, 31, -38, 42, -45, 45, -43, 39, -34, 26, -17, 8, 2, -12, 21, -30, 36, -41, 44, -45, 44, -40, 35, -28, 19, -10, }, { 32, -45, 43, -39, 35, -30, 23, -16, 9, -1, -7, 14, -21, 28, -34, 38, -42, 44, -45, 45, -43, 40, -36, 31, -25, 18, -11, 3, 4, -12, 19, -26, 32, -37, 41, -44, 45, -45, 44, -41, 38, -33, 27, -20, 13, -6, -2, 10, -17, 24, -30, 36, -40, 43, -45, 45, -44, 42, -39, 34, -29, 22, -15, 8, }, { 32, -45, 44, -42, 40, -37, 34, -30, 25, -20, 15, -10, 4, 1, -7, 12, -17, 22, -27, 31, -35, 38, -41, 43, -44, 45, -45, 45, -43, 41, -39, 36, -32, 28, -23, 18, -13, 8, -2, -3, 9, -14, 19, -24, 29, -33, 36, -39, 42, -44, 45, -45, 45, -44, 43, -40, 38, -34, 30, -26, 21, -16, 11, -6, }, { 32, -45, 45, -44, 43, -42, 41, -39, 38, -36, 34, -31, 29, -26, 23, -20, 17, -14, 11, -8, 4, -1, -2, 6, -9, 12, -15, 18, -21, 24, -27, 30, -32, 34, -36, 38, -40, 41, -43, 44, -44, 45, -45, 45, -45, 45, -44, 43, -42, 40, -39, 37, -35, 33, -30, 28, -25, 22, -19, 16, -13, 10, -7, 3, }, { 32, -45, 45, -45, 45, -45, 45, -45, 44, -44, 44, -44, 43, -43, 43, -42, 42, -41, 41, -40, 40, -39, 39, -38, 38, -37, 36, -36, 35, -34, 34, -33, 32, -31, 30, -30, 29, -28, 27, -26, 25, -24, 23, -22, 21, -20, 19, -18, 17, -16, 15, -14, 13, -12, 11, -10, 9, -8, 7, -6, 4, -3, 2, -1, } }; libplacebo-v4.192.1/src/shaders/icc.c000066400000000000000000000301271417677245700173230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "shaders.h" static cmsHPROFILE get_profile(pl_log log, cmsContext cms, struct pl_icc_color_space iccsp, struct pl_color_space *csp) { *csp = iccsp.color; // The input profile for the transformation is dependent on the video // primaries, transfer characteristics, and brightness levels const float lb = csp->hdr.min_luma / PL_COLOR_SDR_WHITE; const float lw = csp->hdr.max_luma / PL_COLOR_SDR_WHITE; const struct pl_raw_primaries *prim = pl_raw_primaries_get(csp->primaries); cmsCIExyY wp_xyY = { prim->white.x, prim->white.y, 1.0 }; cmsCIExyYTRIPLE prim_xyY = { .Red = { prim->red.x, prim->red.y, 1.0 }, .Green = { prim->green.x, prim->green.y, 1.0 }, .Blue = { prim->blue.x, prim->blue.y, 1.0 }, }; if (iccsp.profile.data) { pl_info(log, "Opening ICC profile.."); cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, iccsp.profile.data, iccsp.profile.len); if (!prof) { pl_err(log, "Failed opening ICC profile, falling back to color struct"); goto fallback; } // Update contrast information with detected black point const int intent = PL_INTENT_RELATIVE_COLORIMETRIC; cmsCIEXYZ bp_XYZ; if (!cmsDetectBlackPoint(&bp_XYZ, prof, intent, 0)) return prof; // Map this XYZ value back into the (linear) source space cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); cmsHPROFILE rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, (cmsToneCurve*[3]){linear, linear, linear}); cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, intent, 0); cmsFreeToneCurve(linear); cmsCloseProfile(rev_profile); cmsCloseProfile(xyz_profile); if (!xyz2src) return prof; double src_black[3] = {0}; cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); cmsDeleteTransform(xyz2src); // Convert to (relative) output luminance using RGB->XYZ matrix struct pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(&csp->hdr.prim); float min_luma = 0.0f; for (int i = 0; i < 3; i++) min_luma += rgb2xyz.m[1][i] * src_black[i]; csp->hdr.min_luma = min_luma * csp->hdr.max_luma; return prof; } // fall through fallback:; cmsToneCurve *tonecurve = NULL; switch (csp->transfer) { case PL_COLOR_TRC_LINEAR: tonecurve = cmsBuildGamma(cms, 1.0); break; case PL_COLOR_TRC_GAMMA18: tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 1.8f, powf(lw - lb, 1/1.8f), 0, lb }); break; case PL_COLOR_TRC_GAMMA20: tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 2.0f, powf(lw - lb, 1/2.0f), 0, lb }); break; case PL_COLOR_TRC_GAMMA24: tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 2.4f, powf(lw - lb, 1/2.4f), 0, lb }); break; case PL_COLOR_TRC_GAMMA26: tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 2.6f, powf(lw - lb, 1/2.6f), 0, lb }); break; case PL_COLOR_TRC_GAMMA28: tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 2.6f, powf(lw - lb, 1/2.8f), 0, lb }); break; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_HLG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_GAMMA22: // Catch-all bucket for unimplemented/unknown TRCs csp->transfer = PL_COLOR_TRC_GAMMA22; tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]) { 2.2f, powf(lw - lb, 1/2.2f), 0, lb }); break; case PL_COLOR_TRC_SRGB: { // Curve definition: // (aX + b)^y + e | X >= d // cX + f | X < d const float y = 2.4f; const float s = powf(lw - lb, 1/y); const float a = s / 1.055f; const float b = a * 0.055f; const float c = (lw - lb) / 12.92f; const float d = 0.04045f; tonecurve = cmsBuildParametricToneCurve(cms, 5, (double[7]) { y, a, b, c, d, lb, lb }); break; } case PL_COLOR_TRC_PRO_PHOTO: { // Curve definition: // (aX + b)^y + e | X >= d // cX + f | X < d const float y = 1.8f; const float s = powf(lw - lb, 1/y); const float c = (lw - lb) / 16; const float d = 0.03125f; tonecurve = cmsBuildParametricToneCurve(cms, 5, (double[7]){ y, s, 0, c, d, lb, lb }); break; } case PL_COLOR_TRC_BT_1886: { // Curve definition: // (aX + b)^y + c const float y = 2.4f; const float lby = powf(lb, 1/y); const float lwy = powf(lw, 1/y); tonecurve = cmsBuildParametricToneCurve(cms, 6, (double[4]){ y, lwy - lby, lby, 0 }); break; } case PL_COLOR_TRC_COUNT: pl_unreachable(); } if (!tonecurve) return NULL; cmsToneCurve *curves[3] = { tonecurve, tonecurve, tonecurve }; cmsHPROFILE ret = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, curves); cmsFreeToneCurve(tonecurve); return ret; } static void error_callback(cmsContext cms, cmsUInt32Number code, const char *msg) { pl_log log = cmsGetContextUserData(cms); pl_err(log, "lcms2: [%d] %s", (int) code, msg); } struct sh_icc_obj { pl_log log; struct pl_icc_params params; struct pl_icc_color_space src, dst; struct pl_icc_result result; pl_shader_obj lut_obj; bool updated; // to detect misuse of the API bool ok; ident_t lut; }; static void fill_icc(void *datap, const struct sh_lut_params *params) { struct sh_icc_obj *obj = params->priv; pl_assert(params->comps == 4); struct pl_icc_color_space src = obj->src; cmsHPROFILE srcp = NULL, dstp = NULL; cmsHTRANSFORM trafo = NULL; uint16_t *tmp = NULL; obj->ok = false; cmsContext cms = cmsCreateContext(NULL, (void *) obj->log); if (!cms) { PL_ERR(obj, "Failed creating LittleCMS context!"); goto error; } cmsSetLogErrorHandlerTHR(cms, error_callback); clock_t start = clock(); dstp = get_profile(obj->log, cms, obj->dst, &obj->result.dst_color); if (obj->params.use_display_contrast) { src.color.hdr.max_luma = obj->result.dst_color.hdr.max_luma; src.color.hdr.min_luma = obj->result.dst_color.hdr.min_luma; } srcp = get_profile(obj->log, cms, src, &obj->result.src_color); clock_t after_profiles = clock(); pl_log_cpu_time(obj->log, start, after_profiles, "opening ICC profiles"); if (!srcp || !dstp) goto error; uint32_t flags = cmsFLAGS_HIGHRESPRECALC | cmsFLAGS_BLACKPOINTCOMPENSATION | cmsFLAGS_NOCACHE; trafo = cmsCreateTransformTHR(cms, srcp, TYPE_RGB_16, dstp, TYPE_RGB_16, obj->params.intent, flags); clock_t after_transform = clock(); pl_log_cpu_time(obj->log, after_profiles, after_transform, "creating ICC transform"); if (!trafo) { PL_ERR(obj, "Failed creating CMS transform!"); goto error; } int s_r = params->width, s_g = params->height, s_b = params->depth; pl_assert(s_r > 1 && s_g > 1 && s_b > 1); tmp = pl_alloc(NULL, 2 * s_r * 3 * sizeof(tmp[0])); uint16_t *out = tmp + s_r * 3; for (int b = 0; b < s_b; b++) { for (int g = 0; g < s_g; g++) { // Transform a single line of the output buffer for (int r = 0; r < s_r; r++) { tmp[r * 3 + 0] = r * 65535 / (s_r - 1); tmp[r * 3 + 1] = g * 65535 / (s_g - 1); tmp[r * 3 + 2] = b * 65535 / (s_b - 1); } cmsDoTransform(trafo, tmp, out, s_r); // Write this line into the right output position size_t offset = (b * s_g + g) * s_r * 4; float *data = ((float *) datap) + offset; for (int r = 0; r < s_r; r++) { data[r * 4 + 0] = out[r * 3 + 0] / 65535.0; data[r * 4 + 1] = out[r * 3 + 1] / 65535.0; data[r * 4 + 2] = out[r * 3 + 2] / 65535.0; data[r * 4 + 3] = 1.0; } } } pl_log_cpu_time(obj->log, after_transform, clock(), "generating ICC 3DLUT"); obj->ok = true; // fall through error: if (trafo) cmsDeleteTransform(trafo); if (srcp) cmsCloseProfile(srcp); if (dstp) cmsCloseProfile(dstp); if (cms) cmsDeleteContext(cms); pl_free_ptr(&tmp); } static void sh_icc_uninit(pl_gpu gpu, void *ptr) { struct sh_icc_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut_obj); *obj = (struct sh_icc_obj) {0}; } static bool icc_csp_eq(const struct pl_icc_color_space *a, const struct pl_icc_color_space *b) { return pl_icc_profile_equal(&a->profile, &b->profile) && pl_color_space_equal(&a->color, &b->color); } bool pl_icc_update(pl_shader sh, const struct pl_icc_color_space *srcp, const struct pl_icc_color_space *dstp, pl_shader_obj *icc, struct pl_icc_result *out, const struct pl_icc_params *params) { params = PL_DEF(params, &pl_icc_default_params); size_t s_r = PL_DEF(params->size_r, 64), s_g = PL_DEF(params->size_g, 64), s_b = PL_DEF(params->size_b, 64); struct sh_icc_obj *obj; obj = SH_OBJ(sh, icc, PL_SHADER_OBJ_ICC, struct sh_icc_obj, sh_icc_uninit); if (!obj) return false; struct pl_icc_color_space src = *srcp, dst = *dstp; pl_color_space_infer(&src.color); pl_color_space_infer_ref(&dst.color, &src.color); bool changed = !icc_csp_eq(&obj->src, &src) || !icc_csp_eq(&obj->dst, &dst) || memcmp(&obj->params, params, sizeof(*params)); // Update the object, since we need this information from `fill_icc` obj->log = sh->log; obj->params = *params; obj->src = src; obj->dst = dst; obj->lut = sh_lut(sh, sh_lut_params( .object = &obj->lut_obj, .type = PL_VAR_FLOAT, .width = s_r, .height = s_g, .depth = s_b, .comps = 4, .linear = true, .update = changed, .fill = fill_icc, .priv = obj, )); if (!obj->lut || !obj->ok) return false; obj->updated = true; *out = obj->result; return true; } void pl_icc_apply(pl_shader sh, pl_shader_obj *icc) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; struct sh_icc_obj *obj; obj = SH_OBJ(sh, icc, PL_SHADER_OBJ_ICC, struct sh_icc_obj, sh_icc_uninit); if (!obj || !obj->lut || !obj->updated || !obj->ok) { SH_FAIL(sh, "pl_icc_apply called without prior pl_icc_update?"); return; } sh_describe(sh, "ICC 3DLUT"); GLSL("// pl_icc_apply \n" "color.rgb = %s(color.rgb).rgb; \n", obj->lut); obj->updated = false; } const struct pl_icc_params pl_icc_default_params = { PL_ICC_DEFAULTS }; libplacebo-v4.192.1/src/shaders/lut.c000066400000000000000000000207401417677245700173710ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "shaders.h" static inline bool isnumeric(char c) { return (c >= '0' && c <= '9') || c == '-'; } void pl_lut_free(struct pl_custom_lut **lut) { pl_free_ptr(lut); } struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *cstr, size_t cstr_len) { struct pl_custom_lut *lut = pl_zalloc_ptr(NULL, lut); pl_str str = (pl_str) { (uint8_t *) cstr, cstr_len }; lut->signature = pl_str_hash(str); int entries = 0; float min[3] = { 0.0, 0.0, 0.0 }; float max[3] = { 1.0, 1.0, 1.0 }; // Parse header while (str.len && !isnumeric(str.buf[0])) { pl_str line = pl_str_strip(pl_str_getline(str, &str)); if (!line.len) continue; // skip empty line if (pl_str_eatstart0(&line, "TITLE")) { pl_info(log, "Loading LUT: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } if (pl_str_eatstart0(&line, "LUT_3D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 1024) { pl_err(log, "Invalid 3DLUT size: %dx%d%x", size, size, size); goto error; } lut->size[0] = lut->size[1] = lut->size[2] = size; entries = size * size * size; continue; } if (pl_str_eatstart0(&line, "LUT_1D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 65536) { pl_err(log, "Invalid 1DLUT size: %d", size); goto error; } lut->size[0] = size; lut->size[1] = lut->size[2] = 0; entries = size; continue; } if (pl_str_eatstart0(&line, "DOMAIN_MIN")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[1]) || !pl_str_parse_float(line, &min[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "DOMAIN_MAX")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[1]) || !pl_str_parse_float(line, &max[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "#")) { pl_debug(log, "Unhandled .cube comment: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } pl_warn(log, "Unhandled .cube line: %.*s", PL_STR_FMT(pl_str_strip(line))); } if (!entries) { pl_err(log, "Missing LUT size specification?"); goto error; } for (int i = 0; i < 3; i++) { if (max[i] - min[i] < 1e-6) { pl_err(log, "Invalid domain range: [%f, %f]", min[i], max[i]); goto error; } } float *data = pl_alloc(lut, sizeof(float[3]) * entries); lut->data = data; // Parse LUT body clock_t start = clock(); for (int n = 0; n < entries; n++) { for (int c = 0; c < 3; c++) { static const char * const digits = "0123456789.-+e"; // Extract valid digit sequence size_t len = pl_strspn(str, digits); pl_str entry = (pl_str) { str.buf, len }; str.buf += len; str.len -= len; if (!entry.len) { if (!str.len) { pl_err(log, "Failed parsing LUT: Unexpected EOF, expected " "%d entries, got %d", entries * 3, n * 3 + c + 1); } else { pl_err(log, "Failed parsing LUT: Unexpected '%c', expected " "digit", str.buf[0]); } goto error; } float num; if (!pl_str_parse_float(entry, &num)) { pl_err(log, "Failed parsing float value '%.*s'", PL_STR_FMT(entry)); goto error; } // Rescale to range 0.0 - 1.0 *data++ = (num - min[c]) / (max[c] - min[c]); // Skip whitespace between digits str = pl_str_strip(str); } } str = pl_str_strip(str); if (str.len) pl_warn(log, "Extra data after LUT?... ignoring '%c'", str.buf[0]); pl_log_cpu_time(log, start, clock(), "parsing .cube LUT"); return lut; error: pl_free(lut); return NULL; } static void fill_lut(void *datap, const struct sh_lut_params *params) { const struct pl_custom_lut *lut = params->priv; int dim_r = params->width; int dim_g = PL_DEF(params->height, 1); int dim_b = PL_DEF(params->depth, 1); float *data = datap; for (int b = 0; b < dim_b; b++) { for (int g = 0; g < dim_g; g++) { for (int r = 0; r < dim_r; r++) { size_t offset = (b * dim_g + g) * dim_r + r; const float *src = &lut->data[offset * 3]; float *dst = &data[offset * 4]; dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = 0.0f; } } } } void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, pl_shader_obj *lut_state) { if (!lut) return; int dims; if (lut->size[0] > 0 && lut->size[1] > 0 && lut->size[2] > 0) { dims = 3; } else if (lut->size[0] > 0 && !lut->size[1] && !lut->size[2]) { dims = 1; } else { SH_FAIL(sh, "Invalid dimensions %dx%dx%d for pl_custom_lut, must be 1D " "or 3D!", lut->size[0], lut->size[1], lut->size[2]); return; } if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; ident_t fun = sh_lut(sh, sh_lut_params( .object = lut_state, .type = PL_VAR_FLOAT, .width = lut->size[0], .height = lut->size[1], .depth = lut->size[2], .comps = 4, // for better texel alignment .linear = true, .signature = lut->signature, .fill = fill_lut, .priv = (void *) lut, )); if (!fun) { SH_FAIL(sh, "pl_shader_custom_lut: failed generating LUT object"); return; } GLSL("// pl_shader_custom_lut \n"); static const struct pl_matrix3x3 zero = {0}; if (memcmp(&lut->shaper_in, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = %s * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_in"), .data = PL_TRANSPOSE_3X3(lut->shaper_in.m), })); } switch (dims) { case 1: sh_describe(sh, "custom 1DLUT"); GLSL("color.rgb = vec3(%s(color.r).r, %s(color.g).g, %s(color.b).b); \n", fun, fun, fun); break; case 3: sh_describe(sh, "custom 3DLUT"); GLSL("color.rgb = %s(color.rgb).rgb; \n", fun); break; } if (memcmp(&lut->shaper_out, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = %s * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_out"), .data = PL_TRANSPOSE_3X3(lut->shaper_out.m), })); } } libplacebo-v4.192.1/src/shaders/sampling.c000066400000000000000000001061551417677245700204040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" const struct pl_deband_params pl_deband_default_params = { PL_DEBAND_DEFAULTS }; static inline struct pl_tex_params src_params(const struct pl_sample_src *src) { if (src->tex) return src->tex->params; return (struct pl_tex_params) { .w = src->tex_w, .h = src->tex_h, }; } enum filter { NEAREST = PL_TEX_SAMPLE_NEAREST, LINEAR = PL_TEX_SAMPLE_LINEAR, BEST, FASTEST, }; // Helper function to compute the src/dst sizes and upscaling ratios static bool setup_src(pl_shader sh, const struct pl_sample_src *src, ident_t *src_tex, ident_t *pos, ident_t *size, ident_t *pt, float *ratio_x, float *ratio_y, uint8_t *comp_mask, float *scale, bool resizeable, const char **fn, enum filter filter) { enum pl_shader_sig sig; float src_w, src_h; enum pl_tex_sample_mode sample_mode; if (src->tex) { pl_fmt fmt = src->tex->params.format; bool can_linear = fmt->caps & PL_FMT_CAP_LINEAR; pl_assert(pl_tex_params_dimension(src->tex->params) == 2); sig = PL_SHADER_SIG_NONE; src_w = pl_rect_w(src->rect); src_h = pl_rect_h(src->rect); switch (filter) { case FASTEST: case NEAREST: sample_mode = PL_TEX_SAMPLE_NEAREST; break; case LINEAR: if (!can_linear) { SH_FAIL(sh, "Trying to use a shader that requires linear " "sampling with a texture whose format (%s) does not " "support PL_FMT_CAP_LINEAR", fmt->name); return false; } sample_mode = PL_TEX_SAMPLE_LINEAR; break; case BEST: sample_mode = can_linear ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST; break; } } else { pl_assert(src->tex_w && src->tex_h); sig = PL_SHADER_SIG_SAMPLER; src_w = src->sampled_w; src_h = src->sampled_h; if (filter == BEST || filter == FASTEST) { sample_mode = src->mode; } else { sample_mode = (enum pl_tex_sample_mode) filter; if (sample_mode != src->mode) { SH_FAIL(sh, "Trying to use a shader that requires a different " "filter mode than the external sampler."); return false; } } } src_w = PL_DEF(src_w, src_params(src).w); src_h = PL_DEF(src_h, src_params(src).h); pl_assert(src_w && src_h); int out_w = PL_DEF(src->new_w, roundf(fabs(src_w))); int out_h = PL_DEF(src->new_h, roundf(fabs(src_h))); pl_assert(out_w && out_h); if (ratio_x) *ratio_x = out_w / fabs(src_w); if (ratio_y) *ratio_y = out_h / fabs(src_h); if (scale) *scale = PL_DEF(src->scale, 1.0); if (comp_mask) { uint8_t tex_mask = 0x0Fu; if (src->tex) { // Mask containing only the number of components in the texture tex_mask = (1 << src->tex->params.format->num_components) - 1; } uint8_t src_mask = src->component_mask; if (!src_mask) src_mask = (1 << PL_DEF(src->components, 4)) - 1; // Only actually sample components that are both requested and // available in the texture being sampled *comp_mask = tex_mask & src_mask; } if (resizeable) out_w = out_h = 0; if (!sh_require(sh, sig, out_w, out_h)) return false; if (src->tex) { struct pl_rect2df rect = { .x0 = src->rect.x0, .y0 = src->rect.y0, .x1 = src->rect.x0 + src_w, .y1 = src->rect.y0 + src_h, }; if (fn) *fn = sh_tex_fn(sh, src->tex->params); *src_tex = sh_bind(sh, src->tex, src->address_mode, sample_mode, "src_tex", &rect, pos, size, pt); } else { if (size) { *size = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_size"), .data = &(float[2]) { src->tex_w, src->tex_h }, }); } if (pt) { float sx = 1.0 / src->tex_w, sy = 1.0 / src->tex_h; if (src->sampler == PL_SAMPLER_RECT) sx = sy = 1.0; *pt = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_pt"), .data = &(float[2]) { sx, sy }, }); } if (fn) *fn = sh_tex_fn(sh, (struct pl_tex_params) { .w = 1, .h = 1 }); // 2D sh->sampler_type = src->sampler; pl_assert(src->format); switch (src->format) { case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: sh->sampler_prefix = ' '; break; case PL_FMT_UINT: sh->sampler_prefix = 'u'; break; case PL_FMT_SINT: sh->sampler_prefix = 's'; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } *src_tex = "src_tex"; *pos = "tex_coord"; } return true; } void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src, const struct pl_deband_params *params) { float scale; ident_t tex, pos, pt; const char *fn; if (!setup_src(sh, src, &tex, &pos, NULL, &pt, NULL, NULL, NULL, &scale, false, &fn, LINEAR)) return; params = PL_DEF(params, &pl_deband_default_params); sh_describe(sh, "debanding"); GLSL("vec4 color; \n" "// pl_shader_deband \n" "{ \n"); ident_t prng, state; prng = sh_prng(sh, true, &state); GLSL("vec2 pos = %s; \n" "vec4 avg, diff; \n" "color = %s(%s, pos); \n", pos, fn, tex); if (params->iterations > 0) { // Helper function: Compute a stochastic approximation of the avg color // around a pixel, given a specified radius ident_t average = sh_fresh(sh, "average"); GLSLH("vec4 %s(vec2 pos, float range, inout prng_t %s) {\n" // Compute a random angle and distance " vec2 dd = %s.xy * vec2(range, %f); \n" " vec2 o = dd.x * vec2(cos(dd.y), sin(dd.y)); \n" // Sample at quarter-turn intervals around the source pixel " vec4 sum = vec4(0.0); \n" " sum += %s(%s, pos + %s * vec2( o.x, o.y)); \n" " sum += %s(%s, pos + %s * vec2(-o.x, o.y)); \n" " sum += %s(%s, pos + %s * vec2(-o.x, -o.y)); \n" " sum += %s(%s, pos + %s * vec2( o.x, -o.y)); \n" // Return the (normalized) average " return 0.25 * sum; \n" "}\n", average, state, prng, M_PI * 2, fn, tex, pt, fn, tex, pt, fn, tex, pt, fn, tex, pt); ident_t radius = sh_const_float(sh, "radius", params->radius); ident_t threshold = sh_const_float(sh, "threshold", params->threshold / (1000 * scale)); // For each iteration, compute the average at a given distance and // pick it instead of the color if the difference is below the threshold. for (int i = 1; i <= params->iterations; i++) { GLSL("avg = %s(pos, %d.0 * %s, %s); \n" "diff = abs(color - avg); \n" "color = mix(avg, color, %s(greaterThan(diff, vec4(%s / %d.0)))); \n", average, i, radius, state, sh_bvec(sh, 4), threshold, i); } } GLSL("color *= vec4(%s);\n", SH_FLOAT(scale)); // Add some random noise to smooth out residual differences if (params->grain > 0) { GLSL( "color.rgb += %s * (%s - vec3(0.5)); \n", SH_FLOAT(params->grain / 1000.0), prng); } GLSL("}\n"); } bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; const char *fn; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, NULL, &scale, true, &fn, BEST)) return false; GLSL("// pl_shader_sample_direct \n" "vec4 color = vec4(%s) * %s(%s, %s); \n", SH_FLOAT(scale), fn, tex, pos); return true; } bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; const char *fn; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, NULL, &scale, true, &fn, NEAREST)) return false; sh_describe(sh, "nearest"); GLSL("// pl_shader_sample_nearest \n" "vec4 color = vec4(%s) * %s(%s, %s); \n", SH_FLOAT(scale), fn, tex, pos); return true; } bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; const char *fn; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, NULL, &scale, true, &fn, LINEAR)) return false; sh_describe(sh, "bilinear"); GLSL("// pl_shader_sample_bilinear \n" "vec4 color = vec4(%s) * %s(%s, %s); \n", SH_FLOAT(scale), fn, tex, pos); return true; } static void bicubic_calcweights(pl_shader sh, const char *t, const char *s) { // Explanation of how bicubic scaling with only 4 texel fetches is done: // http://www.mate.tue.nl/mate/pdfs/10318.pdf // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' GLSL("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s \n" " + vec4(1, 0, -0.5, 0.5); \n" "%s = %s * %s + vec4(0.0, 0.0, -0.5, 0.5); \n" "%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666); \n" "%s.xy /= %s.zw; \n" "%s.xy += vec2(1.0 + %s, 1.0 - %s); \n", t, s, t, t, s, t, t, s, t, t, t, s, s); } bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src) { ident_t tex, pos, size, pt; float rx, ry, scale; const char *fn; if (!setup_src(sh, src, &tex, &pos, &size, &pt, &rx, &ry, NULL, &scale, true, &fn, LINEAR)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast bicubic sampling when downscaling. This " "will most likely result in nasty aliasing!"); } sh_describe(sh, "bicubic"); GLSL("// pl_shader_sample_bicubic \n" "vec4 color; \n" "{ \n" "vec2 pos = %s; \n" "vec2 pt = %s; \n" "vec2 size = %s; \n" "vec2 fcoord = fract(pos * size + vec2(0.5)); \n", pos, pt, size); bicubic_calcweights(sh, "parmx", "fcoord.x"); bicubic_calcweights(sh, "parmy", "fcoord.y"); GLSL("vec4 cdelta; \n" "cdelta.xz = parmx.rg * vec2(-pt.x, pt.x); \n" "cdelta.yw = parmy.rg * vec2(-pt.y, pt.y); \n" // first y-interpolation "vec4 ar = %s(%s, pos + cdelta.xy); \n" "vec4 ag = %s(%s, pos + cdelta.xw); \n" "vec4 ab = mix(ag, ar, parmy.b); \n" // second y-interpolation "vec4 br = %s(%s, pos + cdelta.zy); \n" "vec4 bg = %s(%s, pos + cdelta.zw); \n" "vec4 aa = mix(bg, br, parmy.b); \n" // x-interpolation "color = vec4(%s) * mix(aa, ab, parmx.b); \n" "} \n", fn, tex, fn, tex, fn, tex, fn, tex, SH_FLOAT(scale)); return true; } bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src, float threshold) { ident_t tex, pos, size, pt; float rx, ry, scale; const char *fn; if (!setup_src(sh, src, &tex, &pos, &size, &pt, &rx, &ry, NULL, &scale, true, &fn, LINEAR)) return false; ident_t ratio = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("ratio"), .data = &(float[2]) { rx, ry }, }); // Round the position to the nearest pixel sh_describe(sh, "oversample"); GLSL("// pl_shader_sample_oversample \n" "vec4 color; \n" "{ \n" "vec2 pt = %s; \n" "vec2 pos = %s - vec2(0.5) * pt; \n" "vec2 fcoord = fract(pos * %s - vec2(0.5)); \n" "vec2 coeff = fcoord * %s; \n", pt, pos, size, ratio); if (threshold > 0.0) { threshold = PL_MIN(threshold, 1.0); ident_t thresh = sh_const_float(sh, "threshold", threshold); GLSL("coeff = (coeff - %s) / (1.0 - 2.0 * %s); \n", thresh, thresh); } // Compute the right output blend of colors GLSL("coeff = clamp(coeff, 0.0, 1.0); \n" "pos += (coeff - fcoord) * pt; \n" "color = vec4(%s) * %s(%s, pos); \n" "} \n", SH_FLOAT(scale), fn, tex); return true; } static bool filter_compat(pl_filter filter, float inv_scale, int lut_entries, float cutoff, const struct pl_filter_config *params) { if (!filter) return false; if (filter->params.lut_entries != lut_entries) return false; if (fabs(filter->params.filter_scale - inv_scale) > 1e-3) return false; if (filter->params.cutoff != cutoff) return false; return pl_filter_config_eq(&filter->params.config, params); } // Subroutine for computing and adding an individual texel contribution // If `in` is NULL, samples directly // If `in` is set, takes the pixel from inX[idx] where X is the component, // `in` is the given identifier, and `idx` must be defined by the caller static void polar_sample(pl_shader sh, pl_filter filter, const char *fn, ident_t tex, ident_t lut, ident_t cutoff, ident_t radius, int x, int y, uint8_t comp_mask, ident_t in) { // Since we can't know the subpixel position in advance, assume a // worst case scenario int yy = y > 0 ? y-1 : y; int xx = x > 0 ? x-1 : x; float dmax = sqrt(xx*xx + yy*yy); // Skip samples definitely outside the radius if (dmax >= filter->radius_cutoff) return; GLSL("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); // Check for samples that might be skippable bool maybe_skippable = dmax >= filter->radius_cutoff - M_SQRT2; if (maybe_skippable) GLSL("if (d < %s) {\n", cutoff); // Get the weight for this pixel GLSL("w = %s(d * 1.0/%s); \n" "wsum += w; \n", lut, radius); if (in) { for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); GLSL("color[%d] += w * %s%d[idx]; \n", c, in, c); comps &= ~(1 << c); } } else { GLSL("c = %s(%s, base + pt * vec2(%d.0, %d.0)); \n", fn, tex, x, y); for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); GLSL("color[%d] += w * c[%d]; \n", c, c); comps &= ~(1 << c); } } if (maybe_skippable) GLSL("}\n"); } struct sh_sampler_obj { pl_filter filter; pl_shader_obj lut; pl_shader_obj pass2; // for pl_shader_sample_ortho }; static void sh_sampler_uninit(pl_gpu gpu, void *ptr) { struct sh_sampler_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); pl_shader_obj_destroy(&obj->pass2); pl_filter_free(&obj->filter); *obj = (struct sh_sampler_obj) {0}; } static void fill_polar_lut(void *data, const struct sh_lut_params *params) { const struct sh_sampler_obj *obj = params->priv; pl_filter filt = obj->filter; pl_assert(params->width == filt->params.lut_entries && params->comps == 1); memcpy(data, filt->weights, params->width * sizeof(float)); } bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (!params->filter.polar) { SH_FAIL(sh, "Trying to use polar sampling with a non-polar filter?"); return false; } bool has_compute = sh_glsl(sh).compute && !params->no_compute; has_compute &= sh_glsl(sh).version >= 130; // needed for round() if (!src->tex && has_compute) { // FIXME: Could maybe solve this by communicating the wbase from // invocation 0 to the rest of the workgroup using shmem, which would // also allow us to avoid the use of the hacky %s_map below. PL_WARN(sh, "Combining pl_shader_sample_polar with the sampler2D " "interface prevents the use of compute shaders, which is a " "potentially massive performance hit. If you're sure you want " "this, set `params.no_compute` to suppress this warning."); has_compute = false; } bool flipped = src->rect.x0 > src->rect.x1 || src->rect.y0 > src->rect.y1; if (flipped && has_compute) { // FIXME: I'm sure this case could actually be supported with some // extra math in the positional calculations, should implement it PL_WARN(sh, "Trying to use a flipped src.rect with polar sampling! " "This prevents the use of compute shaders, which is a " "potentially massive performance hit. If you're really sure you " "want this, set `params.no_compute` to suppress this warning."); has_compute = false; } uint8_t comp_mask; float rx, ry, scale; ident_t src_tex, pos, size, pt; const char *fn; if (!setup_src(sh, src, &src_tex, &pos, &size, &pt, &rx, &ry, &comp_mask, &scale, false, &fn, FASTEST)) return false; struct sh_sampler_obj *obj; obj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; float inv_scale = 1.0 / PL_MIN(rx, ry); inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; int lut_entries = PL_DEF(params->lut_entries, 64); float cutoff = PL_DEF(params->cutoff, 0.001); bool update = !filter_compat(obj->filter, inv_scale, lut_entries, cutoff, ¶ms->filter); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->log, pl_filter_params( .config = params->filter, .lut_entries = lut_entries, .filter_scale = inv_scale, .cutoff = cutoff, )); if (!obj->filter) { // This should never happen, but just in case .. SH_FAIL(sh, "Failed initializing polar filter!"); return false; } } sh_describe(sh, "polar scaling"); GLSL("// pl_shader_sample_polar \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = %s, size = %s, pt = %s; \n" "vec2 fcoord = fract(pos * size - vec2(0.5)); \n" "vec2 base = pos - pt * fcoord; \n" "vec2 center = base + pt * vec2(0.5); \n" "float w, d, wsum = 0.0; \n" "int idx; \n" "vec4 c; \n", pos, size, pt); int bound = ceil(obj->filter->radius_cutoff); int offset = bound - 1; // padding top/left int padding = offset + bound; // total padding // Determined experimentally on modern AMD and Nvidia hardware. 32 is a // good tradeoff for the horizontal work group size. Apart from that, // just use as many threads as possible. const int bw = 32, bh = sh_glsl(sh).max_group_threads / bw; // We need to sample everything from base_min to base_max, so make sure // we have enough room in shmem int iw = (int) ceil(bw / rx) + padding + 1, ih = (int) ceil(bh / ry) + padding + 1; ident_t in = NULL; int num_comps = __builtin_popcount(comp_mask); int shmem_req = iw * ih * num_comps * sizeof(float); bool is_compute = has_compute && sh_try_compute(sh, bw, bh, false, shmem_req); // For compute shaders, which read the input texels primarily from shmem, // using a texture-based LUT is better. For the fragment shader fallback // code, which is primarily texture bound, the extra cost of LUT // interpolation is worth the reduction in texel fetches. ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .method = is_compute ? SH_LUT_TEXTURE : SH_LUT_AUTO, .type = PL_VAR_FLOAT, .width = lut_entries, .comps = 1, .linear = true, .update = update, .fill = fill_polar_lut, .priv = obj, )); if (!lut) { SH_FAIL(sh, "Failed initializing polar LUT!"); return false; } ident_t cutoff_c = sh_const_float(sh, "radius_cutoff", obj->filter->radius_cutoff); ident_t radius_c = sh_const_float(sh, "radius", obj->filter->radius); if (is_compute) { // Compute shader kernel GLSL("vec2 wpos = %s_map(gl_WorkGroupID * gl_WorkGroupSize); \n" "vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5)); \n" "ivec2 rel = ivec2(round((base - wbase) * size)); \n", pos); ident_t iw_c = sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .compile_time = true, .name ="iw", .data = &iw, }); ident_t ih_c = sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .compile_time = true, .name = "ih", .data = &ih, }); // Load all relevant texels into shmem GLSL("for (int y = int(gl_LocalInvocationID.y); y < %s; y += %d) { \n" "for (int x = int(gl_LocalInvocationID.x); x < %s; x += %d) { \n" "c = %s(%s, wbase + pt * vec2(x - %d, y - %d)); \n", ih_c, bh, iw_c, bw, fn, src_tex, offset, offset); in = sh_fresh(sh, "in"); for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); GLSLH("shared float %s%d[%s * %s]; \n", in, c, ih_c, iw_c); GLSL("%s%d[%s * y + x] = c[%d]; \n", in, c, iw_c, c); comps &= ~(1 << c); } GLSL("}} \n" "barrier(); \n"); // Dispatch the actual samples for (int y = 1 - bound; y <= bound; y++) { for (int x = 1 - bound; x <= bound; x++) { GLSL("idx = %s * rel.y + rel.x + %s * %d + %d; \n", iw_c, iw_c, y + offset, x + offset); polar_sample(sh, obj->filter, fn, src_tex, lut, cutoff_c, radius_c, x, y, comp_mask, in); } } } else { // Fragment shader sampling for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); GLSL("vec4 in%d;\n", c); comps &= ~(1 << c); } // For maximum efficiency, we want to use textureGather() if // possible, rather than direct sampling. Since this is not // always possible/sensible, we need to possibly intermix gathering // with regular sampling. This requires keeping track of which // pixels in the next row were already gathered by the previous // row. uint32_t gathered_cur = 0x0, gathered_next = 0x0; const float radius2 = PL_SQUARE(obj->filter->radius_cutoff); const int base = bound - 1; if (base + bound >= 8 * sizeof(gathered_cur)) { SH_FAIL(sh, "Polar radius %f exceeds implementation capacity!", obj->filter->radius_cutoff); return false; } for (int y = 1 - bound; y <= bound; y++) { for (int x = 1 - bound; x <= bound; x++) { // Skip already gathered texels uint32_t bit = 1llu << (base + x); if (gathered_cur & bit) continue; // Using texture gathering is only more efficient than direct // sampling in the case where we expect to be able to use all // four gathered texels, without having to discard any. So // only do it if we suspect it will be a win rather than a // loss. int xx = x*x, xx1 = (x+1)*(x+1); int yy = y*y, yy1 = (y+1)*(y+1); bool use_gather = PL_MAX(xx, xx1) + PL_MAX(yy, yy1) < radius2; use_gather &= PL_MAX(x, y) <= sh_glsl(sh).max_gather_offset; use_gather &= PL_MIN(x, y) >= sh_glsl(sh).min_gather_offset; use_gather &= !src->tex || src->tex->params.format->gatherable; // Gathering from components other than the R channel requires // support for GLSL 400, which introduces the overload of // textureGather* that allows specifying the component. // // This is also the minimum requirement if we don't know the // texture format capabilities, for the sampler2D interface if (comp_mask != 0x1 || !src->tex) use_gather &= sh_glsl(sh).version >= 400; if (!use_gather) { // Switch to direct sampling instead polar_sample(sh, obj->filter, fn, src_tex, lut, cutoff_c, radius_c, x, y, comp_mask, NULL); continue; } // Gather the four surrounding texels simultaneously for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); if (x || y) { if (c) { GLSL("in%d = textureGatherOffset(%s, center, " "ivec2(%d, %d), %d);\n", c, src_tex, x, y, c); } else { GLSL("in0 = textureGatherOffset(%s, center, " "ivec2(%d, %d));\n", src_tex, x, y); } } else { if (c) { GLSL("in%d = textureGather(%s, center, %d);\n", c, src_tex, c); } else { GLSL("in0 = textureGather(%s, center);\n", src_tex); } } comps &= ~(1 << c); } // Mix in all of the points with their weights for (int p = 0; p < 4; p++) { // The four texels are gathered counterclockwise starting // from the bottom left static const int xo[4] = {0, 1, 1, 0}; static const int yo[4] = {1, 1, 0, 0}; if (x+xo[p] > bound || y+yo[p] > bound) continue; // next subpixel GLSL("idx = %d;\n", p); polar_sample(sh, obj->filter, fn, src_tex, lut, cutoff_c, radius_c, x+xo[p], y+yo[p], comp_mask, "in"); } // Mark the other next row's pixels as already gathered gathered_next |= bit | (bit << 1); x++; // skip adjacent pixel } // Prepare for new row gathered_cur = gathered_next; gathered_next = 0; } } GLSL("color = vec4(%s / wsum) * color; \n", SH_FLOAT(scale)); if (!(comp_mask & (1 << PL_CHANNEL_A))) GLSL("color.a = 1.0; \n"); GLSL("}\n"); return true; } static void fill_ortho_lut(void *data, const struct sh_lut_params *params) { const struct sh_sampler_obj *obj = params->priv; pl_filter filt = obj->filter; size_t entries = filt->params.lut_entries * filt->row_stride; pl_assert(params->width * params->height * params->comps == entries); memcpy(data, filt->weights, entries * sizeof(float)); } bool pl_shader_sample_ortho(pl_shader sh, int pass, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (params->filter.polar) { SH_FAIL(sh, "Trying to use separated sampling with a polar filter?"); return false; } pl_gpu gpu = SH_GPU(sh); pl_assert(gpu); struct pl_sample_src srcfix = *src; switch (pass) { case PL_SEP_VERT: srcfix.rect.x0 = 0; srcfix.rect.x1 = srcfix.new_w = src_params(src).w; break; case PL_SEP_HORIZ: srcfix.rect.y0 = 0; srcfix.rect.y1 = srcfix.new_h = src_params(src).h; break; } uint8_t comp_mask; float ratio[PL_SEP_PASSES], scale; ident_t src_tex, pos, size, pt; const char *fn; if (!setup_src(sh, &srcfix, &src_tex, &pos, &size, &pt, &ratio[PL_SEP_HORIZ], &ratio[PL_SEP_VERT], &comp_mask, &scale, false, &fn, FASTEST)) return false; // We can store a separate sampler object per dimension, so dispatch the // right one. This is needed for two reasons: // 1. Anamorphic content can have a different scaling ratio for each // dimension. In particular, you could be upscaling in one and // downscaling in the other. // 2. After fixing the source for `setup_src`, we lose information about // the scaling ratio of the other component. (Although this is only a // minor reason and could easily be changed with some boilerplate) struct sh_sampler_obj *obj; obj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; if (pass != 0) { obj = SH_OBJ(sh, &obj->pass2, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); assert(obj); } float inv_scale = 1.0 / ratio[pass]; inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; int lut_entries = PL_DEF(params->lut_entries, 64); bool update = !filter_compat(obj->filter, inv_scale, lut_entries, 0.0, ¶ms->filter); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->log, pl_filter_params( .config = params->filter, .lut_entries = lut_entries, .filter_scale = inv_scale, .max_row_size = gpu->limits.max_tex_2d_dim / 4, .row_stride_align = 4, )); if (!obj->filter) { // This should never happen, but just in case .. SH_FAIL(sh, "Failed initializing separated filter!"); return false; } } int N = obj->filter->row_size; // number of samples to convolve int width = obj->filter->row_stride / 4; // width of the LUT texture ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .type = PL_VAR_FLOAT, .width = width, .height = lut_entries, .comps = 4, .linear = true, .update = update, .fill = fill_ortho_lut, .priv = obj, )); if (!lut) { SH_FAIL(sh, "Failed initializing separated LUT!"); return false; } const int dir[PL_SEP_PASSES][2] = { [PL_SEP_HORIZ] = {1, 0}, [PL_SEP_VERT] = {0, 1}, }; static const char *names[PL_SEP_PASSES] = { [PL_SEP_HORIZ] = "ortho scaling (horiz)", [PL_SEP_VERT] = "ortho scaling (vert)", }; sh_describe(sh, names[pass]); GLSL("// pl_shader_sample_ortho \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = %s, size = %s, pt = %s; \n" "vec2 dir = vec2(%d.0, %d.0); \n" "pt *= dir; \n" "vec2 fcoord2 = fract(pos * size - vec2(0.5)); \n" "float fcoord = dot(fcoord2, dir); \n" "vec2 base = pos - fcoord * pt - pt * vec2(%d.0); \n" "float weight; \n" "vec4 ws, c; \n", pos, size, pt, dir[pass][0], dir[pass][1], N / 2 - 1); bool use_ar = params->antiring > 0; if (use_ar) { GLSL("vec4 hi = vec4(0.0); \n" "vec4 lo = vec4(1e9); \n"); } // Dispatch all of the samples GLSL("// scaler samples\n"); for (int n = 0; n < N; n++) { // Load the right weight for this instance. For every 4th weight, we // need to fetch another LUT entry. Otherwise, just use the previous if (n % 4 == 0) { float denom = PL_MAX(1, width - 1); // avoid division by zero GLSL("ws = %s(vec2(%f, fcoord));\n", lut, (n / 4) / denom); } GLSL("weight = ws[%d];\n", n % 4); // Load the input texel and add it to the running sum GLSL("c = %s(%s, base + pt * vec2(%d.0)); \n", fn, src_tex, n); for (uint8_t comps = comp_mask; comps;) { uint8_t c = __builtin_ctz(comps); GLSL("color[%d] += weight * c[%d]; \n", c, c); comps &= ~(1 << c); if (use_ar && (n == N / 2 - 1 || n == N / 2)) { GLSL("lo[%d] = min(lo[%d], c[%d]); \n" "hi[%d] = max(hi[%d], c[%d]); \n", c, c, c, c, c, c); } } } if (use_ar) { GLSL("color = mix(color, clamp(color, lo, hi), %s);\n", sh_const_float(sh, "antiring", params->antiring)); } GLSL("color *= vec4(%s);\n", SH_FLOAT(scale)); if (!(comp_mask & (1 << PL_CHANNEL_A))) GLSL("color.a = 1.0; \n"); GLSL("}\n"); return true; } libplacebo-v4.192.1/src/siphash.c000066400000000000000000000074771417677245700166070ustar00rootroot00000000000000/* SipHash reference C implementation Modified for use by libplacebo: - Hard-coded a fixed key (k0 and k1) - Hard-coded the output size to 64 bits - Return the result vector directly Copyright (c) 2012-2016 Jean-Philippe Aumasson Copyright (c) 2012-2014 Daniel J. Bernstein To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. . */ #include "common.h" /* default: SipHash-2-4 */ #define cROUNDS 2 #define dROUNDS 4 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) #define U8TO64_LE(p) \ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) #define SIPROUND \ do { \ v0 += v1; \ v1 = ROTL(v1, 13); \ v1 ^= v0; \ v0 = ROTL(v0, 32); \ v2 += v3; \ v3 = ROTL(v3, 16); \ v3 ^= v2; \ v0 += v3; \ v3 = ROTL(v3, 21); \ v3 ^= v0; \ v2 += v1; \ v1 = ROTL(v1, 17); \ v1 ^= v2; \ v2 = ROTL(v2, 32); \ } while (0) uint64_t pl_mem_hash(const void *mem, size_t size) { if (!size) return 0x8533321381b8254bULL; uint64_t v0 = 0x736f6d6570736575ULL; uint64_t v1 = 0x646f72616e646f6dULL; uint64_t v2 = 0x6c7967656e657261ULL; uint64_t v3 = 0x7465646279746573ULL; uint64_t k0 = 0xfe9f075098ddb0faULL; uint64_t k1 = 0x68f7f03510e5285cULL; uint64_t m; int i; const uint8_t *buf = mem; const uint8_t *end = buf + size - (size % sizeof(uint64_t)); const int left = size & 7; uint64_t b = ((uint64_t) size) << 56; v3 ^= k1; v2 ^= k0; v1 ^= k1; v0 ^= k0; for (; buf != end; buf += 8) { m = U8TO64_LE(buf); v3 ^= m; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= m; } switch (left) { case 7: b |= ((uint64_t) buf[6]) << 48; // fall through case 6: b |= ((uint64_t) buf[5]) << 40; // fall through case 5: b |= ((uint64_t) buf[4]) << 32; // fall through case 4: b |= ((uint64_t) buf[3]) << 24; // fall through case 3: b |= ((uint64_t) buf[2]) << 16; // fall through case 2: b |= ((uint64_t) buf[1]) << 8; // fall through case 1: b |= ((uint64_t) buf[0]); break; case 0: break; } v3 ^= b; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= b; v2 ^= 0xff; for (i = 0; i < dROUNDS; ++i) SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; return b; } libplacebo-v4.192.1/src/swapchain.c000066400000000000000000000052751417677245700171170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "log.h" #include "swapchain.h" void pl_swapchain_destroy(pl_swapchain *ptr) { pl_swapchain sw = *ptr; if (!sw) return; sw->impl->destroy(sw); *ptr = NULL; } int pl_swapchain_latency(pl_swapchain sw) { if (!sw->impl->latency) return 0; return sw->impl->latency(sw); } bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height) { int dummy[2] = {0}; width = PL_DEF(width, &dummy[0]); height = PL_DEF(height, &dummy[1]); if (!sw->impl->resize) { *width = *height = 0; return true; } return sw->impl->resize(sw, width, height); } void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) { if (!sw->impl->colorspace_hint) return; struct pl_swapchain_colors fix = {0}; if (csp) { fix = *csp; bool has_metadata = !pl_hdr_metadata_equal(&fix.hdr, &pl_hdr_metadata_empty); bool is_hdr = pl_color_transfer_is_hdr(fix.transfer); // Ensure consistency of the metadata and requested transfer function if (has_metadata && !fix.transfer) { fix.transfer = PL_COLOR_TRC_PQ; } else if (has_metadata && !is_hdr) { fix.hdr = pl_hdr_metadata_empty; } else if (!has_metadata && is_hdr) { fix.hdr = pl_hdr_metadata_hdr10; } } sw->impl->colorspace_hint(sw, &fix); } bool pl_swapchain_hdr_metadata(pl_swapchain sw, const struct pl_hdr_metadata *metadata) { if (metadata) pl_swapchain_colorspace_hint(sw, pl_color_space( .hdr = *metadata )); return true; } bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { *out_frame = (struct pl_swapchain_frame) {0}; // sanity return sw->impl->start_frame(sw, out_frame); } bool pl_swapchain_submit_frame(pl_swapchain sw) { return sw->impl->submit_frame(sw); } void pl_swapchain_swap_buffers(pl_swapchain sw) { sw->impl->swap_buffers(sw); } libplacebo-v4.192.1/src/swapchain.h000066400000000000000000000021441417677245700171140ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #define SW_PFN(name) __typeof__(pl_swapchain_##name) *name struct pl_sw_fns { // This destructor follows the same rules as `pl_gpu_fns` void (*destroy)(pl_swapchain sw); SW_PFN(latency); // optional SW_PFN(resize); // optional SW_PFN(colorspace_hint); // optional SW_PFN(start_frame); SW_PFN(submit_frame); SW_PFN(swap_buffers); }; #undef SW_PFN libplacebo-v4.192.1/src/tests/000077500000000000000000000000001417677245700161275ustar00rootroot00000000000000libplacebo-v4.192.1/src/tests/bench.c000066400000000000000000000332331417677245700173560ustar00rootroot00000000000000#include "tests.h" #include #define TEX_SIZE 2048 #define CUBE_SIZE 64 #define NUM_FBOS 16 #define BENCH_DUR 3 static pl_tex create_test_img(pl_gpu gpu) { pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, PL_FMT_CAP_LINEAR); REQUIRE(fmt); int cube_stride = TEX_SIZE / CUBE_SIZE; int cube_count = cube_stride * cube_stride; assert(cube_count * CUBE_SIZE * CUBE_SIZE == TEX_SIZE * TEX_SIZE); float *data = malloc(TEX_SIZE * TEX_SIZE * sizeof(float[4])); for (int n = 0; n < cube_count; n++) { int xbase = (n % cube_stride) * CUBE_SIZE; int ybase = (n / cube_stride) * CUBE_SIZE; for (int g = 0; g < CUBE_SIZE; g++) { for (int r = 0; r < CUBE_SIZE; r++) { int xpos = xbase + r; int ypos = ybase + g; assert(xpos < TEX_SIZE && ypos < TEX_SIZE); float *color = &data[(ypos * TEX_SIZE + xpos) * 4]; color[0] = (float) r / CUBE_SIZE; color[1] = (float) g / CUBE_SIZE; color[2] = (float) n / cube_count; color[3] = 1.0; } } } pl_tex tex = pl_tex_create(gpu, pl_tex_params( .format = fmt, .w = TEX_SIZE, .h = TEX_SIZE, .sampleable = true, .initial_data = data, )); free(data); REQUIRE(tex); return tex; } struct bench { void (*run_sh)(pl_shader sh, pl_shader_obj *state, pl_tex src); void (*run_tex)(pl_gpu gpu, pl_tex tex); }; static void run_bench(pl_gpu gpu, pl_dispatch dp, pl_shader_obj *state, pl_tex src, pl_tex fbo, pl_timer timer, const struct bench *bench) { if (bench->run_sh) { pl_shader sh = pl_dispatch_begin(dp); bench->run_sh(sh, state, src); pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = fbo, .timer = timer, )); } else { bench->run_tex(gpu, fbo); } } static void benchmark(pl_gpu gpu, const char *name, const struct bench *bench) { pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); pl_shader_obj state = NULL; pl_tex src = create_test_img(gpu); // Create the FBOs pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE); REQUIRE(fmt); pl_tex fbos[NUM_FBOS] = {0}; for (int i = 0; i < NUM_FBOS; i++) { fbos[i] = pl_tex_create(gpu, pl_tex_params( .format = fmt, .w = TEX_SIZE, .h = TEX_SIZE, .renderable = true, .blit_dst = true, .host_writable = true, .host_readable = true, .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE), )); REQUIRE(fbos[i]); pl_tex_clear(gpu, fbos[i], (float[4]){ 0.0 }); } // Run the benchmark and flush+block once to force shader compilation etc. run_bench(gpu, dp, &state, src, fbos[0], NULL, bench); pl_gpu_finish(gpu); // Perform the actual benchmark struct timeval start = {0}, stop = {0}; unsigned long frames = 0; int index = 0; pl_timer timer = pl_timer_create(gpu); uint64_t gputime_total = 0; unsigned long gputime_count = 0; uint64_t gputime; gettimeofday(&start, NULL); do { frames++; run_bench(gpu, dp, &state, src, fbos[index++], timer, bench); index %= NUM_FBOS; if (index == 0) { pl_gpu_flush(gpu); gettimeofday(&stop, NULL); } while ((gputime = pl_timer_query(gpu, timer))) { gputime_total += gputime; gputime_count++; } } while (stop.tv_sec - start.tv_sec < BENCH_DUR); // Force the GPU to finish execution and re-measure the final stop time pl_gpu_finish(gpu); gettimeofday(&stop, NULL); while ((gputime = pl_timer_query(gpu, timer))) { gputime_total += gputime; gputime_count++; } float secs = (float) (stop.tv_sec - start.tv_sec) + 1e-6 * (stop.tv_usec - start.tv_usec); printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)", name, frames, secs, 1000 * secs / frames, frames / secs); if (gputime_count) printf(", gpu time: %2.6f ms", 1e-6 * (gputime_total / gputime_count)); printf("\n"); pl_timer_destroy(gpu, &timer); pl_shader_obj_destroy(&state); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); for (int i = 0; i < NUM_FBOS; i++) pl_tex_destroy(gpu, &fbos[i]); } // List of benchmarks static void bench_deband(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL); } static void bench_deband_heavy(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( .iterations = 4, .threshold = 4.0, .radius = 4.0, .grain = 16.0, )); } static void bench_bilinear(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_bilinear(sh, pl_sample_src( .tex = src )); } static void bench_bicubic(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_bicubic(sh, pl_sample_src( .tex = src )); } static void bench_dither_blue(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_BLUE_NOISE, )); } static void bench_dither_white(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_WHITE_NOISE, )); } static void bench_dither_ordered_fix(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_ORDERED_FIXED, )); } static void bench_polar(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .lut = state, }; pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms); } static void bench_polar_nocompute(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .no_compute = true, .lut = state, }; pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms); } static void bench_hdr_peak(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_detect_peak(sh, pl_color_space_hdr10, state, NULL); } static void bench_hdr_lut(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_color_map_params params = { PL_COLOR_MAP_DEFAULTS .tone_mapping_function = &pl_tone_map_linear, .tone_mapping_mode = PL_TONE_MAP_RGB, }; pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } static void bench_hdr_clip(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_color_map_params params = { PL_COLOR_MAP_DEFAULTS .tone_mapping_function = &pl_tone_map_clip, .tone_mapping_mode = PL_TONE_MAP_RGB, }; pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } static void bench_av1_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_AV1, .params.av1 = av1_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; pl_shader_film_grain(sh, state, ¶ms); } static void bench_av1_grain_lap(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_AV1, .params.av1 = av1_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; params.data.params.av1.overlap = true; pl_shader_film_grain(sh, state, ¶ms); } static void bench_h274_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_H274, .params.h274 = h274_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; pl_shader_film_grain(sh, state, ¶ms); } static void bench_reshape_poly(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_dovi_reshape(sh, &(struct pl_dovi_metadata) { .comp = { { .num_pivots = 8, .pivots = {0.0, 0.00488758553, 0.0420332365, 0.177908108, 0.428152502, 0.678396881, 0.92864126, 1.0}, .method = {0, 0, 0, 0, 0, 0, 0}, .poly_coeffs = { {0.00290930271, 2.30019712, 50.1446037}, {0.00725257397, 1.88119054, -4.49443769}, {0.0150123835, 1.61106598, -1.64833081}, {0.0498571396, 1.2059114, -0.430627108}, {0.0878019333, 1.01845241, -0.19669354}, {0.120447636, 0.920134187, -0.122338772}, {2.12430835, -3.30913281, 2.10893941}, }, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {0}, .poly_coeffs = {{-0.397901177, 1.85908031, 0}}, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {0}, .poly_coeffs = {{-0.399355531, 1.85591626, 0}}, }, }}); } static void bench_reshape_mmr(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_dovi_reshape(sh, &dovi_meta); // this includes MMR } static float data[TEX_SIZE * TEX_SIZE * 4 + 8192]; static void bench_download(pl_gpu gpu, pl_tex tex) { pl_tex_download(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), )); } static void bench_upload(pl_gpu gpu, pl_tex tex) { pl_tex_upload(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), )); } static void dummy_cb(void *arg) {} static void bench_download_async(pl_gpu gpu, pl_tex tex) { pl_tex_download(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), .callback = dummy_cb, )); } static void bench_upload_async(pl_gpu gpu, pl_tex tex) { pl_tex_upload(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), .callback = dummy_cb, )); } int main() { setbuf(stdout, NULL); setbuf(stderr, NULL); pl_log log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple, .log_level = PL_LOG_WARN, )); pl_vulkan vk = pl_vulkan_create(log, pl_vulkan_params( .allow_software = true, .async_transfer = false, .queue_count = NUM_FBOS, )); if (!vk) return SKIP; #define BENCH_SH(fn) &(struct bench) { .run_sh = fn } #define BENCH_TEX(fn) &(struct bench) { .run_tex = fn } printf("= Running benchmarks =\n"); benchmark(vk->gpu, "tex_download ptr", BENCH_TEX(bench_download)); benchmark(vk->gpu, "tex_download ptr async", BENCH_TEX(bench_download_async)); benchmark(vk->gpu, "tex_upload ptr", BENCH_TEX(bench_upload)); benchmark(vk->gpu, "tex_upload ptr async", BENCH_TEX(bench_upload_async)); benchmark(vk->gpu, "bilinear", BENCH_SH(bench_bilinear)); benchmark(vk->gpu, "bicubic", BENCH_SH(bench_bicubic)); benchmark(vk->gpu, "deband", BENCH_SH(bench_deband)); benchmark(vk->gpu, "deband_heavy", BENCH_SH(bench_deband_heavy)); // Polar sampling benchmark(vk->gpu, "polar", BENCH_SH(bench_polar)); if (vk->gpu->glsl.compute) benchmark(vk->gpu, "polar_nocompute", BENCH_SH(bench_polar_nocompute)); // Dithering algorithms benchmark(vk->gpu, "dither_blue", BENCH_SH(bench_dither_blue)); benchmark(vk->gpu, "dither_white", BENCH_SH(bench_dither_white)); benchmark(vk->gpu, "dither_ordered_fixed", BENCH_SH(bench_dither_ordered_fix)); // HDR peak detection if (vk->gpu->glsl.compute) benchmark(vk->gpu, "hdr_peakdetect", BENCH_SH(bench_hdr_peak)); // Tone mapping benchmark(vk->gpu, "hdr_lut", BENCH_SH(bench_hdr_lut)); benchmark(vk->gpu, "hdr_clip", BENCH_SH(bench_hdr_clip)); // Misc stuff benchmark(vk->gpu, "av1_grain", BENCH_SH(bench_av1_grain)); benchmark(vk->gpu, "av1_grain_lap", BENCH_SH(bench_av1_grain_lap)); benchmark(vk->gpu, "h274_grain", BENCH_SH(bench_h274_grain)); benchmark(vk->gpu, "reshape_poly", BENCH_SH(bench_reshape_poly)); benchmark(vk->gpu, "reshape_mmr", BENCH_SH(bench_reshape_mmr)); pl_vulkan_destroy(&vk); pl_log_destroy(&log); return 0; } libplacebo-v4.192.1/src/tests/colorspace.c000066400000000000000000000322701417677245700204310ustar00rootroot00000000000000#include "tests.h" int main() { for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { bool ycbcr = sys >= PL_COLOR_SYSTEM_BT_601 && sys <= PL_COLOR_SYSTEM_YCGCO; REQUIRE(ycbcr == pl_color_system_is_ycbcr_like(sys)); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { bool hdr = trc >= PL_COLOR_TRC_PQ && trc <= PL_COLOR_TRC_S_LOG2; REQUIRE(hdr == pl_color_transfer_is_hdr(trc)); REQUIRE(pl_color_transfer_nominal_peak(trc) >= 1.0); } float pq_peak = pl_color_transfer_nominal_peak(PL_COLOR_TRC_PQ); REQUIRE(feq(PL_COLOR_SDR_WHITE * pq_peak, 10000, 1e-7)); struct pl_color_repr tv_repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, }; struct pl_color_repr pc_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, }; // Ensure this is a no-op for bits == bits for (int bits = 1; bits <= 16; bits++) { tv_repr.bits.color_depth = tv_repr.bits.sample_depth = bits; pc_repr.bits.color_depth = pc_repr.bits.sample_depth = bits; REQUIRE(feq(pl_color_repr_normalize(&tv_repr), 1.0, 1e-7)); REQUIRE(feq(pl_color_repr_normalize(&pc_repr), 1.0, 1e-7)); } tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 10; float tv8to10 = pl_color_repr_normalize(&tv_repr); tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 12; float tv8to12 = pl_color_repr_normalize(&tv_repr); // Simulate the effect of GPU texture sampling on UNORM texture REQUIRE(feq(tv8to10 * 16 /1023., 64/1023., 1e-7)); // black REQUIRE(feq(tv8to10 * 235/1023., 940/1023., 1e-7)); // nominal white REQUIRE(feq(tv8to10 * 128/1023., 512/1023., 1e-7)); // achromatic REQUIRE(feq(tv8to10 * 240/1023., 960/1023., 1e-7)); // nominal chroma peak REQUIRE(feq(tv8to12 * 16 /4095., 256 /4095., 1e-7)); // black REQUIRE(feq(tv8to12 * 235/4095., 3760/4095., 1e-7)); // nominal white REQUIRE(feq(tv8to12 * 128/4095., 2048/4095., 1e-7)); // achromatic REQUIRE(feq(tv8to12 * 240/4095., 3840/4095., 1e-7)); // nominal chroma peak // Ensure lavc's xyz12 is handled correctly struct pl_color_repr xyz12 = { .sys = PL_COLOR_SYSTEM_XYZ, .levels = PL_COLOR_LEVELS_UNKNOWN, .bits = { .sample_depth = 16, .color_depth = 12, .bit_shift = 4, }, }; float xyz = pl_color_repr_normalize(&xyz12); REQUIRE(feq(xyz * (4095 << 4), 65535, 1e-7)); // Assume we uploaded a 10-bit source directly (unshifted) as a 16-bit // texture. This texture multiplication factor should make it behave as if // it was uploaded as a 10-bit texture instead. pc_repr.bits.color_depth = 10; pc_repr.bits.sample_depth = 16; float pc10to16 = pl_color_repr_normalize(&pc_repr); REQUIRE(feq(pc10to16 * 1000/65535., 1000/1023., 1e-7)); const struct pl_raw_primaries *bt709, *bt2020; bt709 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); bt2020 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020); struct pl_matrix3x3 rgb2xyz, rgb2xyz_; rgb2xyz = rgb2xyz_ = pl_get_rgb2xyz_matrix(bt709); pl_matrix3x3_invert(&rgb2xyz_); pl_matrix3x3_invert(&rgb2xyz_); // Make sure the double-inversion round trips for (int y = 0; y < 3; y++) { for (int x = 0; x < 3; x++) REQUIRE(feq(rgb2xyz.m[y][x], rgb2xyz_.m[y][x], 1e-6)); } // Make sure mapping the spectral RGB colors (i.e. the matrix rows) matches // our original primaries float Y = rgb2xyz.m[1][0]; REQUIRE(feq(rgb2xyz.m[0][0], pl_cie_X(bt709->red) * Y, 1e-7)); REQUIRE(feq(rgb2xyz.m[2][0], pl_cie_Z(bt709->red) * Y, 1e-7)); Y = rgb2xyz.m[1][1]; REQUIRE(feq(rgb2xyz.m[0][1], pl_cie_X(bt709->green) * Y, 1e-7)); REQUIRE(feq(rgb2xyz.m[2][1], pl_cie_Z(bt709->green) * Y, 1e-7)); Y = rgb2xyz.m[1][2]; REQUIRE(feq(rgb2xyz.m[0][2], pl_cie_X(bt709->blue) * Y, 1e-7)); REQUIRE(feq(rgb2xyz.m[2][2], pl_cie_Z(bt709->blue) * Y, 1e-7)); // Make sure the gamut mapping round-trips struct pl_matrix3x3 bt709_bt2020, bt2020_bt709; bt709_bt2020 = pl_get_color_mapping_matrix(bt709, bt2020, PL_INTENT_RELATIVE_COLORIMETRIC); bt2020_bt709 = pl_get_color_mapping_matrix(bt2020, bt709, PL_INTENT_RELATIVE_COLORIMETRIC); for (int n = 0; n < 10; n++) { float vec[3] = { RANDOM, RANDOM, RANDOM }; float dst[3] = { vec[0], vec[1], vec[2] }; pl_matrix3x3_apply(&bt709_bt2020, dst); pl_matrix3x3_apply(&bt2020_bt709, dst); for (int i = 0; i < 3; i++) REQUIRE(feq(dst[i], vec[i], 1e-6)); } // Ensure the decoding matrix round-trips to white/black for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { if (!pl_color_system_is_linear(sys)) continue; printf("testing color system %u\n", (unsigned) sys); struct pl_color_repr repr = { .levels = PL_COLOR_LEVELS_LIMITED, .sys = sys, .bits = { // synthetic test .color_depth = 8, .sample_depth = 10, }, }; float scale = pl_color_repr_normalize(&repr); struct pl_transform3x3 yuv2rgb = pl_color_repr_decode(&repr, NULL); pl_matrix3x3_scale(&yuv2rgb.mat, scale); static const float white_ycbcr[3] = { 235/1023., 128/1023., 128/1023. }; static const float black_ycbcr[3] = { 16/1023., 128/1023., 128/1023. }; static const float white_other[3] = { 235/1023., 235/1023., 235/1023. }; static const float black_other[3] = { 16/1023., 16/1023., 16/1023. }; float white[3], black[3]; for (int i = 0; i < 3; i++) { if (pl_color_system_is_ycbcr_like(sys)) { white[i] = white_ycbcr[i]; black[i] = black_ycbcr[i]; } else { white[i] = white_other[i]; black[i] = black_other[i]; } } pl_transform3x3_apply(&yuv2rgb, white); REQUIRE(feq(white[0], 1.0, 1e-6)); REQUIRE(feq(white[1], 1.0, 1e-6)); REQUIRE(feq(white[2], 1.0, 1e-6)); pl_transform3x3_apply(&yuv2rgb, black); REQUIRE(feq(black[0], 0.0, 1e-6)); REQUIRE(feq(black[1], 0.0, 1e-6)); REQUIRE(feq(black[2], 0.0, 1e-6)); } // Make sure chromatic adaptation works struct pl_raw_primaries bt709_d50; bt709_d50 = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); bt709_d50.white = (struct pl_cie_xy) { 0.34567, 0.35850 }; struct pl_matrix3x3 d50_d65; d50_d65 = pl_get_color_mapping_matrix(&bt709_d50, bt709, PL_INTENT_RELATIVE_COLORIMETRIC); float white[3] = { 1.0, 1.0, 1.0 }; pl_matrix3x3_apply(&d50_d65, white); REQUIRE(feq(white[0], 1.0, 1e-6) && feq(white[1], 1.0, 1e-6) && feq(white[2], 1.0, 1e-6)); // Simulate a typical 10-bit YCbCr -> 16 bit texture conversion tv_repr.bits.color_depth = 10; tv_repr.bits.sample_depth = 16; struct pl_transform3x3 yuv2rgb; yuv2rgb = pl_color_repr_decode(&tv_repr, NULL); float test[3] = { 575/65535., 336/65535., 640/65535. }; pl_transform3x3_apply(&yuv2rgb, test); REQUIRE(feq(test[0], 0.808305, 1e-6)); REQUIRE(feq(test[1], 0.553254, 1e-6)); REQUIRE(feq(test[2], 0.218841, 1e-6)); // DVD REQUIRE(pl_color_system_guess_ycbcr(720, 480) == PL_COLOR_SYSTEM_BT_601); REQUIRE(pl_color_system_guess_ycbcr(720, 576) == PL_COLOR_SYSTEM_BT_601); REQUIRE(pl_color_primaries_guess(720, 576) == PL_COLOR_PRIM_BT_601_625); REQUIRE(pl_color_primaries_guess(720, 480) == PL_COLOR_PRIM_BT_601_525); // PAL 16:9 REQUIRE(pl_color_system_guess_ycbcr(1024, 576) == PL_COLOR_SYSTEM_BT_601); REQUIRE(pl_color_primaries_guess(1024, 576) == PL_COLOR_PRIM_BT_601_625); // HD REQUIRE(pl_color_system_guess_ycbcr(1280, 720) == PL_COLOR_SYSTEM_BT_709); REQUIRE(pl_color_system_guess_ycbcr(1920, 1080) == PL_COLOR_SYSTEM_BT_709); REQUIRE(pl_color_primaries_guess(1280, 720) == PL_COLOR_PRIM_BT_709); REQUIRE(pl_color_primaries_guess(1920, 1080) == PL_COLOR_PRIM_BT_709); // Odd/weird videos REQUIRE(pl_color_primaries_guess(2000, 576) == PL_COLOR_PRIM_BT_709); REQUIRE(pl_color_primaries_guess(200, 200) == PL_COLOR_PRIM_BT_709); REQUIRE(pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_sdtv)); REQUIRE(!pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_hdtv)); struct pl_color_repr repr = pl_color_repr_unknown; pl_color_repr_merge(&repr, &pl_color_repr_uhdtv); REQUIRE(pl_color_repr_equal(&repr, &pl_color_repr_uhdtv)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_UNKNOWN)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_525)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_625)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_709)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_470M)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_2020)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_APPLE)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_ADOBE)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_PRO_PHOTO)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_CIE_1931)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DCI_P3)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DISPLAY_P3)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_V_GAMUT)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_S_GAMUT)); REQUIRE(!pl_color_light_is_scene_referred(PL_COLOR_LIGHT_UNKNOWN)); REQUIRE(!pl_color_light_is_scene_referred(PL_COLOR_LIGHT_DISPLAY)); REQUIRE(pl_color_light_is_scene_referred(PL_COLOR_LIGHT_SCENE_HLG)); REQUIRE(pl_color_light_is_scene_referred(PL_COLOR_LIGHT_SCENE_709_1886)); REQUIRE(pl_color_light_is_scene_referred(PL_COLOR_LIGHT_SCENE_1_2)); struct pl_color_space space = pl_color_space_unknown; pl_color_space_merge(&space, &pl_color_space_bt709); REQUIRE(pl_color_space_equal(&space, &pl_color_space_bt709)); // Infer some color spaces struct pl_color_space hlg = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; pl_color_space_infer(&hlg); REQUIRE(hlg.hdr.max_luma == 1000.0f); struct pl_color_space unknown = {0}; struct pl_color_space display = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, }; pl_color_space_infer(&unknown); pl_color_space_infer(&display); REQUIRE(pl_color_space_equal(&unknown, &display)); float x, y; pl_chroma_location_offset(PL_CHROMA_LEFT, &x, &y); REQUIRE(x == -0.5 && y == 0.0); pl_chroma_location_offset(PL_CHROMA_TOP_LEFT, &x, &y); REQUIRE(x == -0.5 && y == -0.5); pl_chroma_location_offset(PL_CHROMA_CENTER, &x, &y); REQUIRE(x == 0.0 && y == 0.0); pl_chroma_location_offset(PL_CHROMA_BOTTOM_CENTER, &x, &y); REQUIRE(x == 0.0 && y == 0.5); REQUIRE(pl_raw_primaries_get(PL_COLOR_PRIM_UNKNOWN) == pl_raw_primaries_get(PL_COLOR_PRIM_BT_709)); // Color blindness tests float red[3] = { 1.0, 0.0, 0.0 }; float green[3] = { 0.0, 1.0, 0.0 }; float blue[3] = { 0.0, 0.0, 1.0 }; #define TEST_CONE(model, color) \ do { \ float tmp[3] = { (color)[0], (color)[1], (color)[2] }; \ struct pl_matrix3x3 mat = pl_get_cone_matrix(&(model), bt709); \ pl_matrix3x3_apply(&mat, tmp); \ printf("%s + %s = %f %f %f\n", #model, #color, tmp[0], tmp[1], tmp[2]); \ for (int i = 0; i < 3; i++) \ REQUIRE(fabs((color)[i] - tmp[i]) < 1e-6); \ } while(0) struct pl_cone_params red_only = { .cones = PL_CONE_MS }; struct pl_cone_params green_only = { .cones = PL_CONE_LS }; struct pl_cone_params blue_only = pl_vision_monochromacy; // These models should all round-trip white TEST_CONE(pl_vision_normal, white); TEST_CONE(pl_vision_protanopia, white); TEST_CONE(pl_vision_protanomaly, white); TEST_CONE(pl_vision_deuteranomaly, white); TEST_CONE(pl_vision_tritanomaly, white); TEST_CONE(pl_vision_achromatopsia, white); TEST_CONE(red_only, white); TEST_CONE(green_only, white); TEST_CONE(blue_only, white); // These models should round-trip blue TEST_CONE(pl_vision_normal, blue); TEST_CONE(pl_vision_protanomaly, blue); TEST_CONE(pl_vision_deuteranomaly, blue); // These models should round-trip red TEST_CONE(pl_vision_normal, red); TEST_CONE(pl_vision_tritanomaly, red); TEST_CONE(pl_vision_tritanopia, red); // These models should round-trip green TEST_CONE(pl_vision_normal, green); // Color adaptation tests struct pl_cie_xy d65 = pl_white_from_temp(6504); struct pl_cie_xy d55 = pl_white_from_temp(5503); REQUIRE(feq(d65.x, 0.31271, 1e-3) && feq(d65.y, 0.32902, 1e-3)); REQUIRE(feq(d55.x, 0.33242, 1e-3) && feq(d55.y, 0.34743, 1e-3)); } libplacebo-v4.192.1/src/tests/common.c000066400000000000000000000102511417677245700175620ustar00rootroot00000000000000#include "tests.h" static int irand() { return rand() - RAND_MAX / 2; } int main() { pl_log log = pl_test_logger(); pl_log_update(log, NULL); pl_log_destroy(&log); // Test some misc helper functions struct pl_rect2d rc2 = { irand(), irand(), irand(), irand(), }; struct pl_rect3d rc3 = { irand(), irand(), irand(), irand(), irand(), irand(), }; pl_rect2d_normalize(&rc2); REQUIRE(rc2.x1 >= rc2.x0); REQUIRE(rc2.y1 >= rc2.y0); pl_rect3d_normalize(&rc3); REQUIRE(rc3.x1 >= rc3.x0); REQUIRE(rc3.y1 >= rc3.y0); REQUIRE(rc3.z1 >= rc3.z0); struct pl_rect2df rc2f = { RANDOM, RANDOM, RANDOM, RANDOM, }; struct pl_rect3df rc3f = { RANDOM, RANDOM, RANDOM, RANDOM, RANDOM, RANDOM, }; pl_rect2df_normalize(&rc2f); REQUIRE(rc2.x1 >= rc2.x0); REQUIRE(rc2.y1 >= rc2.y0); pl_rect3df_normalize(&rc3f); REQUIRE(rc3.x1 >= rc3.x0); REQUIRE(rc3.y1 >= rc3.y0); REQUIRE(rc3.z1 >= rc3.z0); struct pl_rect2d rc2r = pl_rect2df_round(&rc2f); struct pl_rect3d rc3r = pl_rect3df_round(&rc3f); REQUIRE(fabs(rc2r.x0 - rc2f.x0) <= 0.5); REQUIRE(fabs(rc2r.x1 - rc2f.x1) <= 0.5); REQUIRE(fabs(rc2r.y0 - rc2f.y0) <= 0.5); REQUIRE(fabs(rc2r.y1 - rc2f.y1) <= 0.5); REQUIRE(fabs(rc3r.x0 - rc3f.x0) <= 0.5); REQUIRE(fabs(rc3r.x1 - rc3f.x1) <= 0.5); REQUIRE(fabs(rc3r.y0 - rc3f.y0) <= 0.5); REQUIRE(fabs(rc3r.y1 - rc3f.y1) <= 0.5); REQUIRE(fabs(rc3r.z0 - rc3f.z0) <= 0.5); REQUIRE(fabs(rc3r.z1 - rc3f.z1) <= 0.5); struct pl_transform3x3 tr = { .mat = {{ { RANDOM, RANDOM, RANDOM }, { RANDOM, RANDOM, RANDOM }, { RANDOM, RANDOM, RANDOM }, }}, .c = { RANDOM, RANDOM, RANDOM }, }; struct pl_transform3x3 tr2 = tr; float scale = 1.0 + RANDOM; pl_transform3x3_scale(&tr2, scale); pl_transform3x3_invert(&tr2); pl_transform3x3_invert(&tr2); pl_transform3x3_scale(&tr2, 1.0 / scale); for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { printf("%f %f\n", tr.mat.m[i][j], tr2.mat.m[i][j]); REQUIRE(feq(tr.mat.m[i][j], tr2.mat.m[i][j], 1e-4)); } REQUIRE(feq(tr.c[i], tr2.c[i], 1e-4)); } // Test aspect ratio code const struct pl_rect2df rc1080p = {0, 0, 1920, 1080}; const struct pl_rect2df rc43 = {0, 0, 1024, 768}; struct pl_rect2df rc; REQUIRE(feq(pl_rect2df_aspect(&rc1080p), 16.0/9.0, 1e-8)); REQUIRE(feq(pl_rect2df_aspect(&rc43), 4.0/3.0, 1e-8)); #define pl_rect2df_midx(rc) (((rc).x0 + (rc).x1) / 2.0) #define pl_rect2df_midy(rc) (((rc).y0 + (rc).y1) / 2.0) for (float aspect = 0.2; aspect < 3.0; aspect += 0.4) { for (float scan = 0.0; scan <= 1.0; scan += 0.5) { rc = rc1080p; pl_rect2df_aspect_set(&rc, aspect, scan); printf("aspect %.2f, panscan %.1f: {%f %f} -> {%f %f}\n", aspect, scan, rc.x0, rc.y0, rc.x1, rc.y1); REQUIRE(feq(pl_rect2df_aspect(&rc), aspect, 1e-6)); REQUIRE(feq(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6)); REQUIRE(feq(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6)); } } rc = rc1080p; pl_rect2df_aspect_fit(&rc, &rc43, 0.0); REQUIRE(feq(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc43), 1e-6)); REQUIRE(feq(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6)); REQUIRE(feq(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6)); REQUIRE(feq(pl_rect_w(rc), pl_rect_w(rc43), 1e-6)); REQUIRE(feq(pl_rect_h(rc), pl_rect_h(rc43), 1e-6)); rc = rc43; pl_rect2df_aspect_fit(&rc, &rc1080p, 0.0); REQUIRE(feq(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc1080p), 1e-6)); REQUIRE(feq(pl_rect2df_midx(rc), pl_rect2df_midx(rc43), 1e-6)); REQUIRE(feq(pl_rect2df_midy(rc), pl_rect2df_midy(rc43), 1e-6)); REQUIRE(feq(pl_rect_w(rc), pl_rect_w(rc43), 1e-6)); rc = (struct pl_rect2df) { 1920, 1080, 0, 0 }; pl_rect2df_offset(&rc, 50, 100); REQUIRE(feq(rc.x0, 1870, 1e-6)); REQUIRE(feq(rc.x1, -50, 1e-6)); REQUIRE(feq(rc.y0, 980, 1e-6)); REQUIRE(feq(rc.y1, -100, 1e-6)); } libplacebo-v4.192.1/src/tests/d3d11.c000066400000000000000000000025041417677245700171100ustar00rootroot00000000000000#include "gpu_tests.h" #include "d3d11/gpu.h" #include int main() { pl_log log = pl_test_logger(); IDXGIFactory1 *factory = NULL; IDXGIAdapter1 *adapter1 = NULL; HRESULT hr; HMODULE dxgi = LoadLibraryW(L"dxgi.dll"); if (!dxgi) return SKIP; PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory1 = (void *) GetProcAddress(dxgi, "CreateDXGIFactory1"); if (!pCreateDXGIFactory1) return SKIP; pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory); // Test all attached devices for (int i = 0;; i++) { hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1); if (hr == DXGI_ERROR_NOT_FOUND) break; if (FAILED(hr)) { printf("Failed to enumerate adapters\n"); return SKIP; } DXGI_ADAPTER_DESC1 desc; hr = IDXGIAdapter1_GetDesc1(adapter1, &desc); if (FAILED(hr)) { printf("Failed to enumerate adapters\n"); return SKIP; } SAFE_RELEASE(adapter1); const struct pl_d3d11 *d3d11 = pl_d3d11_create(log, pl_d3d11_params( .debug = true, .adapter_luid = desc.AdapterLuid, )); REQUIRE(d3d11); gpu_shader_tests(d3d11->gpu); pl_d3d11_destroy(&d3d11); } SAFE_RELEASE(factory); } libplacebo-v4.192.1/src/tests/dav1d.c000066400000000000000000000030371417677245700172750ustar00rootroot00000000000000#include "tests.h" #include "libplacebo/utils/dav1d.h" int main() { // Test enum functions for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { enum Dav1dMatrixCoefficients mc = pl_system_to_dav1d(sys); enum pl_color_system sys2 = pl_system_from_dav1d(mc); // Exceptions to the rule, due to different handling in dav1d if (sys != PL_COLOR_SYSTEM_BT_2100_HLG && sys != PL_COLOR_SYSTEM_XYZ) REQUIRE(!sys2 || sys2 == sys); } for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) { int range = pl_levels_to_dav1d(lev); enum pl_color_levels lev2 = pl_levels_from_dav1d(range); if (lev != PL_COLOR_LEVELS_UNKNOWN) REQUIRE(lev2 == lev); } for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) { enum Dav1dColorPrimaries dpri = pl_primaries_to_dav1d(prim); enum pl_color_primaries prim2 = pl_primaries_from_dav1d(dpri); REQUIRE(!prim2 || prim2 == prim); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { enum Dav1dTransferCharacteristics dtrc = pl_transfer_to_dav1d(trc); enum pl_color_transfer trc2 = pl_transfer_from_dav1d(dtrc); REQUIRE(!trc2 || trc2 == trc); } for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) { enum Dav1dChromaSamplePosition dloc = pl_chroma_to_dav1d(loc); enum pl_chroma_location loc2 = pl_chroma_from_dav1d(dloc); REQUIRE(!loc2 || loc2 == loc); } } libplacebo-v4.192.1/src/tests/dither.c000066400000000000000000000020141417677245700175470ustar00rootroot00000000000000#include "tests.h" #define SHIFT 4 #define SIZE (1 << SHIFT) float data[SIZE][SIZE]; int main() { printf("Ordered dither matrix:\n"); pl_generate_bayer_matrix(&data[0][0], SIZE); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } printf("Blue noise dither matrix:\n"); pl_generate_blue_noise(&data[0][0], SHIFT); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } // Generate an example of a dither shader pl_log log = pl_test_logger(); pl_shader sh = pl_shader_alloc(log, NULL); pl_shader_obj obj = NULL; pl_shader_dither(sh, 8, &obj, NULL); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); printf("Generated dither shader:\n%s\n", res->glsl); pl_shader_obj_destroy(&obj); pl_shader_free(&sh); pl_log_destroy(&log); } libplacebo-v4.192.1/src/tests/dummy.c000066400000000000000000000037301417677245700174310ustar00rootroot00000000000000#include "gpu_tests.h" int main() { pl_log log = pl_test_logger(); pl_gpu gpu = pl_gpu_dummy_create(log, NULL); pl_buffer_tests(gpu); pl_texture_tests(gpu); // Attempt creating a shader and accessing the resulting LUT pl_tex dummy = pl_tex_dummy_create(gpu, pl_tex_dummy_params( .w = 100, .h = 100, .format = pl_find_named_fmt(gpu, "rgba8"), )); struct pl_sample_src src = { .tex = dummy, .new_w = 1000, .new_h = 1000, }; pl_shader_obj lut = NULL; struct pl_sample_filter_params filter_params = { .filter = pl_filter_ewa_lanczos, .lut = &lut, }; pl_shader sh = pl_shader_alloc(log, pl_shader_params( .gpu = gpu )); REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params)); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); for (int n = 0; n < res->num_descriptors; n++) { const struct pl_shader_desc *sd = &res->descriptors[n]; if (sd->desc.type != PL_DESC_SAMPLED_TEX) continue; pl_tex tex = sd->binding.object; const float *data = (float *) pl_tex_dummy_data(tex); if (!data) continue; // means this was the `dummy` texture for (int i = 0; i < tex->params.w; i++) printf("lut[%d] = %f\n", i, data[i]); } // Try out generation of the sampler2D interface src.tex = NULL; src.tex_w = 100; src.tex_h = 100; src.format = PL_FMT_UNORM; src.sampler = PL_SAMPLER_NORMAL; src.mode = PL_TEX_SAMPLE_LINEAR; pl_shader_reset(sh, pl_shader_params( .gpu = gpu )); REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params)); REQUIRE((res = pl_shader_finalize(sh))); REQUIRE(res->input == PL_SHADER_SIG_SAMPLER); printf("generated sampler2D shader:\n\n%s\n", res->glsl); pl_shader_free(&sh); pl_shader_obj_destroy(&lut); pl_tex_destroy(gpu, &dummy); pl_gpu_dummy_destroy(&gpu); pl_log_destroy(&log); } libplacebo-v4.192.1/src/tests/filters.c000066400000000000000000000023631417677245700177470ustar00rootroot00000000000000#include "tests.h" int main() { pl_log log = pl_test_logger(); for (const struct pl_filter_preset *conf = pl_filter_presets; conf->name; conf++) { if (!conf->filter) continue; struct pl_filter_params params = { .config = *conf->filter, .lut_entries = 128, }; printf("Testing filter '%s'\n", conf->name); pl_filter flt = pl_filter_generate(log, ¶ms); REQUIRE(flt); if (params.config.polar) { // Ensure the kernel seems sanely scaled REQUIRE(feq(flt->weights[0], 1.0, 1e-7)); REQUIRE(feq(flt->weights[params.lut_entries - 1], 0.0, 1e-7)); } else { // Ensure the weights for each row add up to unity for (int i = 0; i < params.lut_entries; i++) { float sum = 0.0; REQUIRE(flt->row_size); REQUIRE(flt->row_stride >= flt->row_size); for (int n = 0; n < flt->row_size; n++) { float w = flt->weights[i * flt->row_stride + n]; sum += w; } REQUIRE(feq(sum, 1.0, 1e-6)); } } pl_filter_free(&flt); } pl_log_destroy(&log); } libplacebo-v4.192.1/src/tests/fuzz/000077500000000000000000000000001417677245700171255ustar00rootroot00000000000000libplacebo-v4.192.1/src/tests/fuzz/lut.c000066400000000000000000000006161417677245700201000ustar00rootroot00000000000000#include "../tests.h" __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { struct pl_custom_lut *lut; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(100000)) { size_t len = __AFL_FUZZ_TESTCASE_LEN; lut = pl_lut_parse_cube(NULL, (char *) buf, len); pl_lut_free(&lut); } } libplacebo-v4.192.1/src/tests/fuzz/shaders.c000066400000000000000000000107541417677245700207310ustar00rootroot00000000000000#include "../tests.h" #include "shaders.h" __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL); #define WIDTH 64 #define HEIGHT 64 #define COMPS 4 static const float empty[HEIGHT][WIDTH][COMPS] = {0}; struct pl_sample_src src = { .tex = pl_tex_create(gpu, pl_tex_params( .format = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, 0, 32, PL_FMT_CAP_SAMPLEABLE), .initial_data = empty, .sampleable = true, .w = WIDTH, .h = HEIGHT, )), .new_w = WIDTH * 2, .new_h = HEIGHT * 2, }; if (!src.tex) return 1; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(10000)) { #define STACK_SIZE 16 pl_shader stack[STACK_SIZE] = {0}; int idx = 0; stack[0] = pl_shader_alloc(NULL, pl_shader_params( .gpu = gpu, )); pl_shader sh = stack[idx]; pl_shader_obj polar = NULL, ortho = NULL, peak = NULL, dither = NULL; size_t len = __AFL_FUZZ_TESTCASE_LEN; for (size_t pos = 0; pos < len; pos++) { switch (buf[pos]) { // Sampling steps case 'S': pl_shader_sample_direct(sh, &src); break; case 'D': pl_shader_deband(sh, &src, NULL); break; case 'P': pl_shader_sample_polar(sh, &src, pl_sample_filter_params( .filter = pl_filter_ewa_lanczos, .lut = &polar, )); case 'O': pl_shader_sample_ortho(sh, PL_SEP_VERT, &src, pl_sample_filter_params( .filter = pl_filter_spline36, .lut = &ortho, )); break; case 'X': pl_shader_custom(sh, &(struct pl_custom_shader) { .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, .body = "// merge subpasses", }); break; // Colorspace transformation steps case 'y': { struct pl_color_repr repr = pl_color_repr_jpeg; pl_shader_decode_color(sh, &repr, NULL); break; } case 'p': pl_shader_detect_peak(sh, pl_color_space_hdr10, &peak, NULL); break; case 'm': pl_shader_color_map(sh, NULL, pl_color_space_bt709, pl_color_space_monitor, NULL, false); break; case 't': pl_shader_color_map(sh, NULL, pl_color_space_hdr10, pl_color_space_monitor, &peak, false); break; case 'd': pl_shader_dither(sh, 8, &dither, pl_dither_params( // Picked to speed up calculation .method = PL_DITHER_ORDERED_LUT, .lut_size = 2, )); break; // Push and pop subshader commands case '(': if (idx+1 == STACK_SIZE) goto invalid; idx++; if (!stack[idx]) { stack[idx] = pl_shader_alloc(NULL, pl_shader_params( .gpu = gpu, .id = idx, )); } sh = stack[idx]; break; case ')': if (idx == 0) goto invalid; idx--; sh_subpass(stack[idx], stack[idx + 1]); pl_shader_reset(stack[idx + 1], pl_shader_params( .gpu = gpu, .id = idx + 1, )); sh = stack[idx]; break; default: goto invalid; } } // Merge remaining shaders while (idx > 0) { sh_subpass(stack[idx - 1], stack[idx]); idx--; } pl_shader_finalize(stack[0]); invalid: for (int i = 0; i < STACK_SIZE; i++) pl_shader_free(&stack[i]); pl_shader_obj_destroy(&polar); pl_shader_obj_destroy(&ortho); pl_shader_obj_destroy(&peak); pl_shader_obj_destroy(&dither); } pl_tex_destroy(gpu, &src.tex); pl_gpu_dummy_destroy(&gpu); } libplacebo-v4.192.1/src/tests/fuzz/user_shaders.c000066400000000000000000000007711417677245700217650ustar00rootroot00000000000000#include "../tests.h" __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL); const struct pl_hook *hook; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(100000)) { size_t len = __AFL_FUZZ_TESTCASE_LEN; hook = pl_mpv_user_shader_parse(gpu, (char *) buf, len); pl_mpv_user_shader_destroy(&hook); } pl_gpu_dummy_destroy(&gpu); } libplacebo-v4.192.1/src/tests/gpu_tests.h000066400000000000000000001444301417677245700203230ustar00rootroot00000000000000#include "tests.h" #include "shaders.h" static void pl_buffer_tests(pl_gpu gpu) { const size_t buf_size = 1024; if (buf_size > gpu->limits.max_buf_size) return; uint8_t *test_src = malloc(buf_size * 2); uint8_t *test_dst = test_src + buf_size; assert(test_src && test_dst); memset(test_dst, 0, buf_size); for (int i = 0; i < buf_size; i++) test_src[i] = (RANDOM * 256); pl_buf buf = NULL, tbuf = NULL; printf("test buffer static creation and readback\n"); buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = buf_size, .host_readable = true, .initial_data = test_src, }); REQUIRE(buf); REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); REQUIRE(memcmp(test_src, test_dst, buf_size) == 0); pl_buf_destroy(gpu, &buf); printf("test buffer empty creation, update and readback\n"); memset(test_dst, 0, buf_size); buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = buf_size, .host_writable = true, .host_readable = true, }); REQUIRE(buf); pl_buf_write(gpu, buf, 0, test_src, buf_size); REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); REQUIRE(memcmp(test_src, test_dst, buf_size) == 0); pl_buf_destroy(gpu, &buf); printf("test buffer-buffer copy and readback\n"); memset(test_dst, 0, buf_size); buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = buf_size, .initial_data = test_src, }); tbuf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = buf_size, .host_readable = true, }); REQUIRE(buf && tbuf); pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size); REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); REQUIRE(memcmp(test_src, test_dst, buf_size) == 0); pl_buf_destroy(gpu, &buf); pl_buf_destroy(gpu, &tbuf); if (buf_size <= gpu->limits.max_mapped_size) { printf("test host mapped buffer readback\n"); buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = buf_size, .host_mapped = true, .initial_data = test_src, }); REQUIRE(buf); REQUIRE(!pl_buf_poll(gpu, buf, 0)); REQUIRE(memcmp(test_src, buf->data, buf_size) == 0); pl_buf_destroy(gpu, &buf); } free(test_src); } static void test_cb(void *priv) { bool *flag = priv; *flag = true; } static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2], uint8_t *src, uint8_t *dst) { if (!tex[0] || !tex[1]) { printf("failed creating test textures... skipping this test\n"); return; } int texels = tex[0]->params.w; texels *= tex[0]->params.h ? tex[0]->params.h : 1; texels *= tex[0]->params.d ? tex[0]->params.d : 1; pl_fmt fmt = tex[0]->params.format; size_t bytes = texels * fmt->texel_size; memset(src, 0, bytes); memset(dst, 0, bytes); for (size_t i = 0; i < bytes; i++) src[i] = (RANDOM * 256); pl_timer ul, dl; ul = pl_timer_create(gpu); dl = pl_timer_create(gpu); bool ran_ul = false, ran_dl = false; REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){ .tex = tex[0], .ptr = src, .timer = ul, .callback = gpu->limits.callbacks ? test_cb : NULL, .priv = &ran_ul, })); // Test blitting, if possible for this format pl_tex dst_tex = tex[0]; if (tex[0]->params.blit_src && tex[1]->params.blit_dst) { pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing pl_tex_blit(gpu, &(struct pl_tex_blit_params) { .src = tex[0], .dst = tex[1], }); dst_tex = tex[1]; } REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){ .tex = dst_tex, .ptr = dst, .timer = dl, .callback = gpu->limits.callbacks ? test_cb : NULL, .priv = &ran_dl, })); pl_gpu_finish(gpu); if (gpu->limits.callbacks) REQUIRE(ran_ul && ran_dl); if (fmt->emulated && fmt->type == PL_FMT_FLOAT) { // TODO: can't memcmp here because bits might be lost due to the // emulated 16/32 bit upload paths, figure out a better way to // generate data and verify the roundtrip! } else { REQUIRE(memcmp(src, dst, bytes) == 0); } // Report timer results printf("upload time: %"PRIu64", download time: %"PRIu64"\n", pl_timer_query(gpu, ul), pl_timer_query(gpu, dl)); pl_timer_destroy(gpu, &ul); pl_timer_destroy(gpu, &dl); } static void pl_texture_tests(pl_gpu gpu) { const size_t max_size = 16*16*16 * 4 *sizeof(double); uint8_t *test_src = malloc(max_size * 2); uint8_t *test_dst = test_src + max_size; for (int f = 0; f < gpu->num_formats; f++) { pl_fmt fmt = gpu->formats[f]; if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE)) continue; printf("testing texture roundtrip for format %s\n", fmt->name); assert(fmt->texel_size <= 4 * sizeof(double)); struct pl_tex_params ref_params = { .format = fmt, .blit_src = (fmt->caps & PL_FMT_CAP_BLITTABLE), .blit_dst = (fmt->caps & PL_FMT_CAP_BLITTABLE), .host_writable = true, .host_readable = true, .debug_tag = PL_DEBUG_TAG, }; pl_tex tex[2]; if (gpu->limits.max_tex_1d_dim >= 16) { printf("... 1D\n"); struct pl_tex_params params = ref_params; params.w = 16; if (!gpu->limits.blittable_1d_3d) params.blit_src = params.blit_dst = false; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } if (gpu->limits.max_tex_2d_dim >= 16) { printf("... 2D\n"); struct pl_tex_params params = ref_params; params.w = params.h = 16; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } if (gpu->limits.max_tex_3d_dim >= 16) { printf("... 3D\n"); struct pl_tex_params params = ref_params; params.w = params.h = params.d = 16; if (!gpu->limits.blittable_1d_3d) params.blit_src = params.blit_dst = false; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } } free(test_src); } static void pl_shader_tests(pl_gpu gpu) { if (gpu->glsl.version < 410) return; const char *vert_shader = "#version 410 \n" "layout(location=0) in vec2 vertex_pos; \n" "layout(location=1) in vec3 vertex_color; \n" "layout(location=0) out vec3 frag_color; \n" "void main() { \n" " gl_Position = vec4(vertex_pos, 0, 1); \n" " frag_color = vertex_color; \n" "}"; const char *frag_shader = "#version 410 \n" "layout(location=0) in vec3 frag_color; \n" "layout(location=0) out vec4 out_color; \n" "void main() { \n" " out_color = vec4(frag_color, 1.0); \n" "}"; pl_fmt fbo_fmt; enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_LINEAR; fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps); if (!fbo_fmt) return; #define FBO_W 16 #define FBO_H 16 pl_tex fbo; fbo = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .renderable = true, .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), .host_readable = true, .blit_dst = true, }); REQUIRE(fbo); pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); pl_fmt vert_fmt; vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3); REQUIRE(vert_fmt); static const struct vertex { float pos[2]; float color[3]; } vertices[] = { {{-1.0, -1.0}, {0, 0, 0}}, {{ 1.0, -1.0}, {1, 0, 0}}, {{-1.0, 1.0}, {0, 1, 0}}, {{ 1.0, 1.0}, {1, 1, 0}}, }; pl_pass pass; pass = pl_pass_create(gpu, &(struct pl_pass_params) { .type = PL_PASS_RASTER, .target_format = fbo_fmt, .vertex_shader = vert_shader, .glsl_shader = frag_shader, .vertex_type = PL_PRIM_TRIANGLE_STRIP, .vertex_stride = sizeof(struct vertex), .num_vertex_attribs = 2, .vertex_attribs = (struct pl_vertex_attrib[]) {{ .name = "vertex_pos", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), .location = 0, .offset = offsetof(struct vertex, pos), }, { .name = "vertex_color", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), .location = 1, .offset = offsetof(struct vertex, color), }}, }); REQUIRE(pass); if (pass->params.cached_program || pass->params.cached_program_len) { // Ensure both are set if either one is set REQUIRE(pass->params.cached_program); REQUIRE(pass->params.cached_program_len); } pl_timer timer = pl_timer_create(gpu); pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = PL_ARRAY_SIZE(vertices), .vertex_data = vertices, .timer = timer, }); // Wait until this pass is complete and report the timer result pl_gpu_finish(gpu); printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer)); pl_timer_destroy(gpu, &timer); static float data[FBO_H * FBO_W * 4] = {0}; // Test against the known pattern of `src`, only useful for roundtrip tests #define TEST_FBO_PATTERN(eps, fmt, ...) \ do { \ printf("testing pattern of " fmt "\n", __VA_ARGS__); \ REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { \ .tex = fbo, \ .ptr = data, \ })); \ \ for (int y = 0; y < FBO_H; y++) { \ for (int x = 0; x < FBO_W; x++) { \ float *color = &data[(y * FBO_W + x) * 4]; \ REQUIRE(feq(color[0], (x + 0.5) / FBO_W, eps)); \ REQUIRE(feq(color[1], (y + 0.5) / FBO_H, eps)); \ REQUIRE(feq(color[2], 0.0, eps)); \ REQUIRE(feq(color[3], 1.0, eps)); \ } \ } \ } while (0) TEST_FBO_PATTERN(1e-6, "%s", "initial rendering"); if (sizeof(vertices) <= gpu->limits.max_vbo_size) { // Test the use of an explicit vertex buffer pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) { .size = sizeof(vertices), .initial_data = vertices, .drawable = true, }); REQUIRE(vert); pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = sizeof(vertices) / sizeof(struct vertex), .vertex_buf = vert, .buf_offset = 0, }); pl_buf_destroy(gpu, &vert); TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer"); } // Test the use of index buffers static const uint16_t indices[] = { 3, 2, 1, 0 }; pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = PL_ARRAY_SIZE(indices), .vertex_data = vertices, .index_data = indices, }); pl_pass_destroy(gpu, &pass); TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering"); // Test the use of pl_dispatch pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); pl_shader sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) { .body = "color = vec4(col, 1.0);", .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, })); REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) { .shader = &sh, .target = fbo, .vertex_stride = sizeof(struct vertex), .vertex_position_idx = 0, .num_vertex_attribs = 2, .vertex_attribs = (struct pl_vertex_attrib[]) {{ .name = "pos", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), .offset = offsetof(struct vertex, pos), }, { .name = "col", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), .offset = offsetof(struct vertex, color), }}, .vertex_type = PL_PRIM_TRIANGLE_STRIP, .vertex_coords = PL_COORDS_NORMALIZED, .vertex_count = PL_ARRAY_SIZE(vertices), .vertex_data = vertices, })); TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices"); pl_tex src; src = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .storable = fbo->params.storable, .sampleable = true, .initial_data = data, }); if (fbo->params.storable) { // Test 1x1 blit, to make sure the scaling code runs REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) { .src = src, .dst = fbo, .src_rc = {0, 0, 0, 1, 1, 1}, .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .sample_mode = PL_TEX_SAMPLE_NEAREST, })); // Test non-resizing blit, which uses the efficient imageLoad path REQUIRE(pl_tex_blit_compute(gpu, dp, &(struct pl_tex_blit_params) { .src = src, .dst = fbo, .src_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .sample_mode = PL_TEX_SAMPLE_NEAREST, })); TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute"); } // Test encoding/decoding of all gamma functions, color spaces, etc. for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_delinearize(sh, pl_color_space( .transfer = trc )); pl_shader_linearize(sh, pl_color_space( .transfer = trc )); REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = fbo, ))); float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6; TEST_FBO_PATTERN(epsilon, "transfer function %d", (int) trc); } for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { if (sys == PL_COLOR_SYSTEM_DOLBYVISION) continue; // requires metadata sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys }); pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); float epsilon; switch (sys) { case PL_COLOR_SYSTEM_BT_2020_C: epsilon = 1e-5; break; case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: // These seem to be horrifically noisy and prone to breaking on // edge cases for some reason // TODO: figure out why! continue; default: epsilon = 1e-6; break; } TEST_FBO_PATTERN(epsilon, "color system %d", (int) sys); } // Repeat this a few times to test the caching for (int i = 0; i < 10; i++) { if (i == 5) { printf("Recreating pl_dispatch to test the caching\n"); size_t size = pl_dispatch_save(dp, NULL); REQUIRE(size > 0); uint8_t *cache = malloc(size); REQUIRE(cache); REQUIRE(pl_dispatch_save(dp, cache) == size); pl_dispatch_destroy(&dp); dp = pl_dispatch_create(gpu->log, gpu); pl_dispatch_load(dp, cache); #ifndef MSAN // Test to make sure the pass regenerates the same cache, but skip // this on MSAN because it doesn't like it when we read from // program cache data generated by the non-instrumented GPU driver uint64_t hash = pl_str_hash((pl_str) { cache, size }); REQUIRE(pl_dispatch_save(dp, NULL) == size); REQUIRE(pl_dispatch_save(dp, cache) == size); REQUIRE(pl_str_hash((pl_str) { cache, size }) == hash); #endif free(cache); } sh = pl_dispatch_begin(dp); // For testing, force the use of CS if possible if (gpu->glsl.compute) { sh->type = SH_COMPUTE; sh->res.compute_group_size[0] = 8; sh->res.compute_group_size[1] = 8; } pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( .iterations = 0, .grain = 0.0, )); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); TEST_FBO_PATTERN(1e-6, "deband iter %d", i); } // Test peak detection and readback if possible sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_obj peak_state = NULL; struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 }; struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 }; if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) { REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) { .shader = &sh, .width = fbo->params.w, .height = fbo->params.h, })); float peak, avg; REQUIRE(pl_get_detected_peak(peak_state, &peak, &avg)); printf("detected peak: %f, average: %f\n", peak, avg); float real_peak = 0, real_avg = 0; for (int y = 0; y < FBO_H; y++) { for (int x = 0; x < FBO_W; x++) { float *color = &data[(y * FBO_W + x) * 4]; float smax = powf(PL_MAX(color[0], color[1]), 2.2); smax = (1 - 1e-3f) * smax + 1e-3f; float slog = logf(PL_MAX(smax, 0.001)); real_peak = PL_MAX(smax, real_peak); real_avg += slog; } } real_avg = expf(real_avg / (FBO_W * FBO_H)); printf("real peak: %f, real average: %f\n", real_peak, real_avg); REQUIRE(feq(peak, real_peak, 1e-4)); REQUIRE(feq(avg, real_avg, 1e-3)); } pl_dispatch_abort(dp, &sh); pl_shader_obj_destroy(&peak_state); #ifdef PL_HAVE_LCMS // Test the use of ICC profiles if available sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_obj icc = NULL; struct pl_icc_color_space src_color = { .color = pl_color_space_bt709 }; struct pl_icc_color_space dst_color = { .color = pl_color_space_srgb }; struct pl_icc_result out; if (pl_icc_update(sh, &src_color, &dst_color, &icc, &out, NULL)) { pl_icc_apply(sh, &icc); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); } pl_dispatch_abort(dp, &sh); pl_shader_obj_destroy(&icc); #endif // Test film grain synthesis pl_shader_obj grain = NULL; struct pl_film_grain_params grain_params = { .tex = src, .components = 3, .component_mapping = { 0, 1, 2}, .repr = &(struct pl_color_repr) { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, .bits = { .color_depth = 10, .sample_depth = 10 }, }, }; for (int i = 0; i < 2; i++) { grain_params.data.type = PL_FILM_GRAIN_AV1; grain_params.data.params.av1 = av1_grain_data; grain_params.data.params.av1.overlap = !!i; grain_params.data.seed = rand(); sh = pl_dispatch_begin(dp); pl_shader_film_grain(sh, &grain, &grain_params); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); } grain_params.data.type = PL_FILM_GRAIN_H274; grain_params.data.params.h274 = h274_grain_data; grain_params.data.seed = rand(); sh = pl_dispatch_begin(dp); pl_shader_film_grain(sh, &grain, &grain_params); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); pl_shader_obj_destroy(&grain); // Test custom shaders struct pl_custom_shader custom = { .header = "vec3 invert(vec3 color) \n" "{ \n" " return vec3(1.0) - color; \n" "} \n", .body = "color = vec4(gl_FragCoord.xy, 0.0, 1.0); \n" "color.rgb = invert(color.rgb) + offset; \n", .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, .num_variables = 1, .variables = &(struct pl_shader_var) { .var = pl_var_float("offset"), .data = &(float) { 0.1 }, }, }; sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_custom(sh, &custom)); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); // Test dolbyvision if (gpu->glsl.version >= 130) { struct pl_color_repr repr = { .sys = PL_COLOR_SYSTEM_DOLBYVISION, .dovi = &dovi_meta, }; sh = pl_dispatch_begin(dp); pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_decode_color(sh, &repr, NULL); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); } pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); pl_tex_destroy(gpu, &fbo); } static void pl_scaler_tests(pl_gpu gpu) { pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR); pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE); if (!src_fmt || !fbo_fmt) return; float *fbo_data = NULL; pl_shader_obj lut = NULL; static float data_5x5[5][5] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, }; pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 5, .h = 5, .format = src_fmt, .sampleable = true, .initial_data = &data_5x5[0][0], }); struct pl_tex_params fbo_params = { .w = 100, .h = 100, .format = fbo_fmt, .renderable = true, .storable = fbo_fmt->caps & PL_FMT_CAP_STORABLE, .host_readable = fbo_fmt->caps & PL_FMT_CAP_HOST_READABLE, }; pl_tex fbo = pl_tex_create(gpu, &fbo_params); pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); if (!dot5x5 || !fbo || !dp) goto error; pl_shader sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = dot5x5, .new_w = fbo->params.w, .new_h = fbo->params.h, ), pl_sample_filter_params( .filter = pl_filter_ewa_lanczos, .lut = &lut, .no_compute = !fbo->params.storable, ) )); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); if (fbo->params.host_readable) { fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float)); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = fbo_data, })); int max = 255; printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max); for (int y = 0; y < fbo->params.h; y++) { for (int x = 0; x < fbo->params.w; x++) { float v = fbo_data[y * fbo->params.h + x]; printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max)); } printf("\n"); } } error: free(fbo_data); pl_shader_obj_destroy(&lut); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &dot5x5); pl_tex_destroy(gpu, &fbo); } static const char *user_shader_tests[] = { // Test hooking, saving and loading "// Example of a comment at the beginning \n" " \n" "//!HOOK NATIVE \n" "//!DESC upscale image \n" "//!BIND HOOKED \n" "//!WIDTH HOOKED.w 10 * \n" "//!HEIGHT HOOKED.h 10 * \n" "//!SAVE NATIVEBIG \n" "//!WHEN NATIVE.w 500 < \n" " \n" "vec4 hook() \n" "{ \n" " return HOOKED_texOff(0); \n" "} \n" " \n" "//!HOOK MAIN \n" "//!DESC downscale bigger image \n" "//!WHEN NATIVE.w 500 < \n" "//!BIND NATIVEBIG \n" " \n" "vec4 hook() \n" "{ \n" " return NATIVEBIG_texOff(0); \n" "} \n", // Test use of textures "//!HOOK MAIN \n" "//!DESC turn everything into colorful pixels \n" "//!BIND HOOKED \n" "//!BIND DISCO \n" "//!COMPONENTS 3 \n" " \n" "vec4 hook() \n" "{ \n" " return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1); \n" "} \n" " \n" "//!TEXTURE DISCO \n" "//!SIZE 3 3 \n" "//!FORMAT rgba32f \n" "//!FILTER NEAREST \n" "//!BORDER REPEAT \n" "0000803f000000000000000000000000000000000000803f0000000000000000000000000" "00000000000803f00000000000000000000803f0000803f000000000000803f0000000000" "00803f000000000000803f0000803f00000000000000009a99993e9a99993e9a99993e000" "000009a99193F9A99193f9a99193f000000000000803f0000803f0000803f00000000 \n", // Test use of storage/buffer resources "//!HOOK MAIN \n" "//!DESC attach some storage objects \n" "//!BIND tex_storage \n" "//!BIND buf_uniform \n" "//!BIND buf_storage \n" "//!COMPONENTS 4 \n" " \n" "vec4 hook() \n" "{ \n" " return vec4(foo, bar, bat); \n" "} \n" " \n" "//!TEXTURE tex_storage \n" "//!SIZE 100 100 \n" "//!FORMAT r32f \n" "//!STORAGE \n" " \n" "//!BUFFER buf_uniform \n" "//!VAR float foo \n" "//!VAR float bar \n" "0000000000000000 \n" " \n" "//!BUFFER buf_storage \n" "//!VAR vec2 bat \n" "//!VAR int big[32]; \n" "//!STORAGE \n" }; static const char *test_luts[] = { "TITLE \"1D identity\" \n" "LUT_1D_SIZE 2 \n" "0.0 0.0 0.0 \n" "1.0 1.0 1.0 \n", "TITLE \"3D identity\" \n" "LUT_3D_SIZE 2 \n" "0.0 0.0 0.0 \n" "1.0 0.0 0.0 \n" "0.0 1.0 0.0 \n" "1.0 1.0 0.0 \n" "0.0 0.0 1.0 \n" "1.0 0.0 1.0 \n" "0.0 1.0 1.0 \n" "1.0 1.0 1.0 \n" }; static bool frame_passthrough(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame) { const struct pl_frame *frame = src->frame_data; *out_frame = *frame; return true; } static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame, const struct pl_queue_params *qparams) { const struct pl_source_frame **pframe = qparams->priv; if (!(*pframe)->frame_data) return PL_QUEUE_EOF; *out_frame = *(*pframe)++; return PL_QUEUE_OK; } static void render_info_cb(void *priv, const struct pl_render_info *info) { printf("{%d} Executed shader: %s\n", info->index, info->pass->shader->description); } static void pl_render_tests(pl_gpu gpu) { pl_tex img5x5_tex = NULL, fbo = NULL; pl_renderer rr = NULL; static float data_5x5[5][5] = { { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 1.0, 0.0, 0.5, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 1.0, 0.0 }, { 0.0, 0.3, 0.0, 0.0, 0.0 }, }; const int width = 5, height = 5; struct pl_plane img5x5 = {0}; struct pl_plane_data img5x5_data = { .type = PL_FMT_FLOAT, .width = width, .height = height, .component_size = { 8 * sizeof(float) }, .component_map = { 0 }, .pixel_stride = sizeof(float), .pixels = &data_5x5, }; if (!pl_recreate_plane(gpu, NULL, &fbo, &img5x5_data)) return; if (!pl_upload_plane(gpu, &img5x5, &img5x5_tex, &img5x5_data)) goto error; rr = pl_renderer_create(gpu->log, gpu); pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); struct pl_frame image = { .num_planes = 1, .planes = { img5x5 }, .repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_FULL, }, .color = pl_color_space_srgb, .crop = {-1.0, 0.0, width - 1.0, height}, }; struct pl_frame target = { .num_planes = 1, .planes = {{ .texture = fbo, .components = 3, .component_mapping = {0, 1, 2}, }}, .crop = {2, 2, fbo->params.w - 2, fbo->params.h - 2}, .repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .bits.color_depth = 32, }, .color = pl_color_space_srgb, }; REQUIRE(pl_render_image(rr, &image, &target, NULL)); // TODO: embed a reference texture and ensure it matches // Test a bunch of different params #define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT) \ do { \ for (int i = 0; i <= LIMIT; i++) { \ printf("testing `" #STYPE "." #FIELD " = %d`\n", i); \ struct pl_render_params params = pl_render_default_params; \ params.force_dither = true; \ struct STYPE tmp = DEFAULT; \ tmp.FIELD = i; \ params.SNAME = &tmp; \ REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); \ pl_gpu_flush(gpu); \ } \ } while (0) #define TEST_PARAMS(NAME, FIELD, LIMIT) \ TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT) for (int i = 0; i < pl_num_scale_filters; i++) { struct pl_render_params params = pl_render_default_params; params.upscaler = pl_scale_filters[i].filter; printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name); REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); pl_gpu_flush(gpu); } TEST_PARAMS(deband, iterations, 3); TEST_PARAMS(sigmoid, center, 1); TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC); TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE); TEST_PARAMS(dither, temporal, true); TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0); // Test HDR tone mapping image.color = pl_color_space_hdr10; TEST_PARAMS(color_map, tone_mapping_mode, PL_TONE_MAP_MODE_COUNT - 1); TEST_PARAMS(color_map, gamut_mode, PL_GAMUT_MODE_COUNT - 1); image.color = pl_color_space_srgb; // Test some misc stuff struct pl_render_params params = pl_render_default_params; params.color_adjustment = &(struct pl_color_adjustment) { .brightness = 0.1, .contrast = 0.9, .saturation = 1.5, .gamma = 0.8, .temperature = 0.3, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); params = pl_render_default_params; params.force_icc_lut = true; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); params = pl_render_default_params; // Test film grain synthesis image.film_grain.type = PL_FILM_GRAIN_AV1; image.film_grain.params.av1 = av1_grain_data, REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); image.film_grain.type = PL_FILM_GRAIN_H274; image.film_grain.params.h274 = h274_grain_data, REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); image.film_grain = (struct pl_film_grain_data) {0}; // Test mpv-style custom shaders for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) { printf("testing user shader:\n\n%s\n", user_shader_tests[i]); const struct pl_hook *hook; hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i], strlen(user_shader_tests[i])); if (gpu->glsl.compute) { REQUIRE(hook); } else { // Not all shaders compile without compute shader support if (!hook) continue; } params.hooks = &hook; params.num_hooks = 1; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); pl_mpv_user_shader_destroy(&hook); } params = pl_render_default_params; // Test custom LUTs for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) { printf("testing custom lut %d\n", i); struct pl_custom_lut *lut; lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i])); REQUIRE(lut); bool has_3dlut = gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100; if (lut->size[2] && !has_3dlut) { pl_lut_free(&lut); continue; } // Test all three at the same time to reduce the number of tests image.lut = target.lut = params.lut = lut; for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) { printf("testing LUT method %d\n", t); image.lut_type = target.lut_type = params.lut_type = t; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); } image.lut = target.lut = params.lut = NULL; pl_lut_free(&lut); } // Test overlays image.num_overlays = 1; image.overlays = &(struct pl_overlay) { .tex = img5x5.texture, .mode = PL_OVERLAY_NORMAL, .num_parts = 2, .parts = (struct pl_overlay_part[]) {{ .src = {0, 0, 2, 2}, .dst = {30, 100, 40, 200}, }, { .src = {2, 2, 5, 5}, .dst = {1000, -1, 3, 5}, }}, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); params.disable_fbos = true; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); image.num_overlays = 0; params = pl_render_default_params; target.num_overlays = 1; target.overlays = &(struct pl_overlay) { .tex = img5x5.texture, .mode = PL_OVERLAY_MONOCHROME, .num_parts = 1, .parts = &(struct pl_overlay_part) { .src = {5, 5, 15, 15}, .dst = {5, 5, 15, 15}, .color = {1.0, 0.5, 0.0}, }, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_render_image(rr, NULL, &target, ¶ms)); target.num_overlays = 0; // Test rotation for (pl_rotation rot = 0; rot < PL_ROTATION_360; rot += PL_ROTATION_90) { image.rotation = rot; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); } // Attempt frame mixing, using the mixer queue helper printf("testing frame mixing \n"); struct pl_render_params mix_params = { .frame_mixer = &pl_filter_mitchell_clamp, .info_callback = render_info_cb, }; struct pl_queue_params qparams = { .radius = pl_frame_mix_radius(&mix_params), .vsync_duration = 1.0 / 60.0, .frame_duration = 1.0 / 24.0, }; #define NUM_MIX_FRAMES 20 struct pl_source_frame srcframes[NUM_MIX_FRAMES+1]; srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0}; for (int i = 0; i < NUM_MIX_FRAMES; i++) { srcframes[i] = (struct pl_source_frame) { .pts = i * qparams.frame_duration, .map = frame_passthrough, .frame_data = &image, }; } pl_queue queue = pl_queue_create(gpu); enum pl_queue_status ret; // Test pre-pushing all frames, with delayed EOF. for (int i = 0; i < NUM_MIX_FRAMES; i++) { const struct pl_source_frame *src = &srcframes[i]; if (i > 10) // test pushing in reverse order src = &srcframes[NUM_MIX_FRAMES - 1 - i]; if (!pl_queue_push_block(queue, 1, src)) // mini-sleep pl_queue_push(queue, src); // push it anyway, for testing } struct pl_frame_mix mix; while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { if (ret == PL_QUEUE_MORE) { REQUIRE(qparams.pts > 0.0); pl_queue_push(queue, NULL); // push delayed EOF continue; } REQUIRE(ret == PL_QUEUE_OK); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Simulate advancing vsync qparams.pts += qparams.vsync_duration; } // Test dynamically pulling all frames, with oversample mixer const struct pl_source_frame *frame_ptr = &srcframes[0]; mix_params.frame_mixer = &pl_oversample_frame_mixer; qparams = (struct pl_queue_params) { .radius = pl_frame_mix_radius(&mix_params), .vsync_duration = qparams.vsync_duration, .frame_duration = qparams.frame_duration, .get_frame = get_frame_ptr, .priv = &frame_ptr, }; pl_queue_reset(queue); while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { REQUIRE(ret == PL_QUEUE_OK); REQUIRE(mix.num_frames <= 2); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); qparams.pts += qparams.vsync_duration; } // Test large PTS jump pl_queue_reset(queue); REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF); pl_queue_destroy(&queue); error: pl_renderer_destroy(&rr); pl_tex_destroy(gpu, &img5x5_tex); pl_tex_destroy(gpu, &fbo); } static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params) { return (struct pl_hook_res) {0}; } static void pl_ycbcr_tests(pl_gpu gpu) { pl_renderer rr = pl_renderer_create(gpu->log, gpu); if (!rr) return; struct pl_plane_data data[3]; for (int i = 0; i < 3; i++) { const int sub = i > 0 ? 1 : 0; const int width = (323 + sub) >> sub; const int height = (255 + sub) >> sub; data[i] = (struct pl_plane_data) { .type = PL_FMT_UNORM, .width = width, .height = height, .component_size = {16}, .component_map = {i}, .pixel_stride = sizeof(uint16_t), .row_stride = PL_ALIGN2(width * sizeof(uint16_t), gpu->limits.align_tex_xfer_pitch), }; } pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]); enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE; if (!fmt || (fmt->caps & caps) != caps) return; pl_tex src_tex[3] = {0}; pl_tex dst_tex[3] = {0}; struct pl_frame img = { .num_planes = 3, .repr = pl_color_repr_hdtv, .color = pl_color_space_bt709, }; struct pl_frame target = { .num_planes = 3, .repr = pl_color_repr_hdtv, .color = pl_color_space_bt709, }; uint8_t *src_buffer[3] = {0}; uint8_t *dst_buffer = NULL; for (int i = 0; i < 3; i++) { // Generate some arbitrary data for the buffer src_buffer[i] = malloc(data[i].height * data[i].row_stride); if (!src_buffer[i]) goto error; data[i].pixels = src_buffer[i]; for (int y = 0; y < data[i].height; y++) { for (int x = 0; x < data[i].width; x++) { size_t off = y * data[i].row_stride + x * data[i].pixel_stride; uint16_t *pixel = (uint16_t *) &src_buffer[i][off]; int gx = 200 + 100 * i, gy = 300 + 150 * i; *pixel = (gx * x) ^ (gy * y); // whatever } } REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i])); } // This co-sites chroma pixels with pixels in the RGB image, meaning we // get an exact round-trip when sampling both ways. This makes it useful // as a test case, even though it's not common in the real world. pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT); for (int i = 0; i < 3; i++) { dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fmt, .w = data[i].width, .h = data[i].height, .renderable = true, .host_readable = true, .storable = fmt->caps & PL_FMT_CAP_STORABLE, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, }); if (!dst_tex[i]) goto error; target.planes[i] = img.planes[i]; target.planes[i].texture = dst_tex[i]; } REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) { .num_hooks = 1, .hooks = &(const struct pl_hook *){&(struct pl_hook) { // Forces chroma merging, to test the chroma merging code .stages = PL_HOOK_CHROMA_INPUT, .hook = noop_hook, }}, })); size_t buf_size = data[0].height * data[0].row_stride; dst_buffer = malloc(buf_size); if (!dst_buffer) goto error; for (int i = 0; i < 3; i++) { memset(dst_buffer, 0xAA, buf_size); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = dst_tex[i], .ptr = dst_buffer, .row_pitch = data[i].row_stride, })); for (int y = 0; y < data[i].height; y++) { for (int x = 0; x < data[i].width; x++) { size_t off = y * data[i].row_stride + x * data[i].pixel_stride; uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off]; uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off]; int diff = abs((int) *src_pixel - (int) *dst_pixel); REQUIRE(diff <= 50); // a little under 0.1% } } } error: pl_renderer_destroy(&rr); free(dst_buffer); for (int i = 0; i < 3; i++) { free(src_buffer[i]); pl_tex_destroy(gpu, &src_tex[i]); pl_tex_destroy(gpu, &dst_tex[i]); } } static void pl_test_export_import(pl_gpu gpu, enum pl_handle_type handle_type) { // Test texture roundtrip if (!(gpu->export_caps.tex & handle_type) || !(gpu->import_caps.tex & handle_type)) goto skip_tex; pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE); if (!fmt) goto skip_tex; printf("testing texture import/export\n"); pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 32, .h = 32, .format = fmt, .export_handle = handle_type, }); REQUIRE(export); REQUIRE_HANDLE(export->shared_mem, handle_type); pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 32, .h = 32, .format = fmt, .import_handle = handle_type, .shared_mem = export->shared_mem, }); REQUIRE(import); pl_tex_destroy(gpu, &import); pl_tex_destroy(gpu, &export); skip_tex: ; // Test buffer roundtrip if (!(gpu->export_caps.buf & handle_type) || !(gpu->import_caps.buf & handle_type)) return; printf("testing buffer import/export\n"); pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = 32, .export_handle = handle_type, }); REQUIRE(exp_buf); REQUIRE_HANDLE(exp_buf->shared_mem, handle_type); pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = 32, .import_handle = handle_type, .shared_mem = exp_buf->shared_mem, }); REQUIRE(imp_buf); pl_buf_destroy(gpu, &imp_buf); pl_buf_destroy(gpu, &exp_buf); } static void pl_test_host_ptr(pl_gpu gpu) { if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR)) return; #ifdef __unix__ printf("testing host ptr\n"); REQUIRE(gpu->limits.max_mapped_size); const size_t size = 2 << 20; const size_t offset = 2 << 10; const size_t slice = 2 << 16; uint8_t *data = aligned_alloc(0x1000, size); for (int i = 0; i < size; i++) data[i] = (uint8_t) i; pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = slice, .import_handle = PL_HANDLE_HOST_PTR, .shared_mem = { .handle.ptr = data, .size = size, .offset = offset, }, .host_mapped = true, }); REQUIRE(buf); REQUIRE(memcmp(data + offset, buf->data, slice) == 0); pl_buf_destroy(gpu, &buf); free(data); #endif // unix } static void gpu_shader_tests(pl_gpu gpu) { pl_buffer_tests(gpu); pl_texture_tests(gpu); pl_shader_tests(gpu); pl_scaler_tests(gpu); pl_render_tests(gpu); pl_ycbcr_tests(gpu); REQUIRE(!pl_gpu_is_failed(gpu)); } static void gpu_interop_tests(pl_gpu gpu) { pl_test_export_import(gpu, PL_HANDLE_DMA_BUF); pl_test_host_ptr(gpu); REQUIRE(!pl_gpu_is_failed(gpu)); } libplacebo-v4.192.1/src/tests/include_tmpl.c000066400000000000000000000000751417677245700207540ustar00rootroot00000000000000#include int main() { return 0; } libplacebo-v4.192.1/src/tests/libav.c000066400000000000000000000221441417677245700173730ustar00rootroot00000000000000#include "tests.h" #include "libplacebo/utils/libav.h" int main() { struct pl_plane_data data[4] = {0}; struct pl_bit_encoding bits; #define TEST(pixfmt, reference) \ do { \ int planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt); \ REQUIRE(planes == sizeof(reference) / sizeof(*reference)); \ REQUIRE(memcmp(data, reference, sizeof(reference)) == 0); \ } while (0) // Planar and semiplanar formats static const struct pl_plane_data yuvp8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {1}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {2}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_YUV420P, yuvp8); TEST(AV_PIX_FMT_YUV422P, yuvp8); TEST(AV_PIX_FMT_YUV444P, yuvp8); TEST(AV_PIX_FMT_YUV410P, yuvp8); TEST(AV_PIX_FMT_YUV411P, yuvp8); TEST(AV_PIX_FMT_YUV440P, yuvp8); static const struct pl_plane_data yuvap8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {1}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {2}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {3}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_YUVA420P, yuvap8); static const struct pl_plane_data yuvp16[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {1}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {2}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_YUV420P10LE, yuvp16); TEST(AV_PIX_FMT_YUV420P16LE, yuvp16); static const struct pl_plane_data nv12[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {1, 2}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_NV12, nv12); static const struct pl_plane_data nv21[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {2, 1}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_NV21, nv21); static const struct pl_plane_data p016[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16, 16}, .component_map = {1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_P010LE, p016); TEST(AV_PIX_FMT_P016LE, p016); // Packed formats static const struct pl_plane_data r8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_GRAY8, r8); static const struct pl_plane_data rg8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {0, 1}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_GRAY8A, rg8); static const struct pl_plane_data rgb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .pixel_stride = 3, } }; TEST(AV_PIX_FMT_RGB24, rgb8); static const struct pl_plane_data bgr8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {2, 1, 0}, .pixel_stride = 3, } }; TEST(AV_PIX_FMT_BGR24, bgr8); static const struct pl_plane_data rgbx8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_RGB0, rgbx8); static const struct pl_plane_data xrgb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .component_pad = {8, 0, 0}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_0RGB, xrgb8); static const struct pl_plane_data rgba8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {0, 1, 2, 3}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_RGBA, rgba8); static const struct pl_plane_data argb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {3, 0, 1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_ARGB, argb8); static const struct pl_plane_data bgra8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {2, 1, 0, 3}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_BGRA, bgra8); static const struct pl_plane_data abgr8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {3, 2, 1, 0}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_ABGR, abgr8); static const struct pl_plane_data r16[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_GRAY16LE, r16); static const struct pl_plane_data rgb16[] = { { .type = PL_FMT_UNORM, .component_size = {16, 16, 16}, .component_map = {0, 1, 2}, .pixel_stride = 6, } }; TEST(AV_PIX_FMT_RGB48LE, rgb16); static const struct pl_plane_data rgb565[] = { { .type = PL_FMT_UNORM, .component_size = {5, 6, 5}, .component_map = {2, 1, 0}, // LSB to MSB .pixel_stride = 2, } }; TEST(AV_PIX_FMT_RGB565LE, rgb565); // Test pl_frame <- AVFrame bridge struct pl_frame image; AVFrame *frame = av_frame_alloc(); frame->format = AV_PIX_FMT_RGBA; pl_frame_from_avframe(&image, frame); REQUIRE(image.num_planes == 1); REQUIRE(image.repr.sys == PL_COLOR_SYSTEM_RGB); // Test inverse mapping struct pl_color_space csp = image.color; pl_color_space_infer(&csp); pl_avframe_set_color(frame, csp); pl_avframe_set_repr(frame, image.repr); pl_avframe_set_profile(frame, image.profile); pl_frame_from_avframe(&image, frame); pl_color_space_infer(&image.color); REQUIRE(pl_color_space_equal(&csp, &image.color)); av_frame_free(&frame); // Test enum functions for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { enum AVColorSpace spc = pl_system_to_av(sys); enum pl_color_system sys2 = pl_system_from_av(spc); // Exception to the rule, due to different handling in libav* if (sys != PL_COLOR_SYSTEM_BT_2100_HLG) REQUIRE(!sys2 || sys2 == sys); } for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) { enum AVColorRange range = pl_levels_to_av(lev); enum pl_color_levels lev2 = pl_levels_from_av(range); REQUIRE(lev2 == lev); } for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) { enum AVColorPrimaries avpri = pl_primaries_to_av(prim); enum pl_color_primaries prim2 = pl_primaries_from_av(avpri); REQUIRE(!prim2 || prim2 == prim); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { enum AVColorTransferCharacteristic avtrc = pl_transfer_to_av(trc); enum pl_color_transfer trc2 = pl_transfer_from_av(avtrc); REQUIRE(!trc2 || trc2 == trc); } for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) { enum AVChromaLocation avloc = pl_chroma_to_av(loc); enum pl_chroma_location loc2 = pl_chroma_from_av(avloc); REQUIRE(loc2 == loc); } } libplacebo-v4.192.1/src/tests/lut.c000066400000000000000000000047611417677245700171070ustar00rootroot00000000000000#include "tests.h" static const char *luts[] = { "TITLE \"1D LUT example\" \n" "LUT_1D_SIZE 11 \n" "# Random comment \n" "0.0 0.0 0.0 \n" "0.1 0.1 0.1 \n" "0.2 0.2 0.2 \n" "0.3 0.3 0.3 \n" "0.4 0.4 0.4 \n" "0.5 0.5 0.5 \n" "0.6 0.6 0.6 \n" "0.7 0.7 0.7 \n" "0.8 0.8 0.8 \n" "0.9 0.9 0.9 \n" "0.10 0.10 0.10 \n", "LUT_3D_SIZE 3 \n" "TITLE \"3D LUT example\" \n" "0.0 0.0 0.0 \n" "0.5 0.0 0.0 \n" "1.0 0.0 0.0 \n" "0.0 0.5 0.0 \n" "0.5 0.5 0.0 \n" "1.0 0.5 0.0 \n" "0.0 1.0 0.0 \n" "0.5 1.0 0.0 \n" "1.0 1.0 0.0 \n" "0.0 0.0 0.5 \n" "0.5 0.0 0.5 \n" "1.0 0.0 0.5 \n" "0.0 0.5 0.5 \n" "0.5 0.5 0.5 \n" "1.0 0.5 0.5 \n" "0.0 1.0 0.5 \n" "0.5 1.0 0.5 \n" "1.0 1.0 0.5 \n" "0.0 0.0 1.0 \n" "0.5 0.0 1.0 \n" "1.0 0.0 1.0 \n" "0.0 0.5 1.0 \n" "0.5 0.5 1.0 \n" "1.0 0.5 1.0 \n" "0.0 1.0 1.0 \n" "0.5 1.0 1.0 \n" "1.0 1.0 1.0 \n", "LUT_1D_SIZE 3 \n" "TITLE \"custom domain\" \n" "DOMAIN_MAX 255 255 255 \n" "0 0 0 \n" "128 128 128 \n" "255 255 255 \n" }; int main() { pl_log log = pl_test_logger(); pl_gpu gpu = pl_gpu_dummy_create(log, NULL); pl_shader sh = pl_shader_alloc(log, NULL); pl_shader_obj obj = NULL; for (int i = 0; i < PL_ARRAY_SIZE(luts); i++) { struct pl_custom_lut *lut; lut = pl_lut_parse_cube(log, luts[i], strlen(luts[i])); REQUIRE(lut); pl_shader_reset(sh, pl_shader_params( .gpu = gpu )); pl_shader_custom_lut(sh, lut, &obj); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); printf("Generated LUT shader:\n%s\n", res->glsl); pl_lut_free(&lut); } pl_shader_obj_destroy(&obj); pl_shader_free(&sh); pl_log_destroy(&log); } libplacebo-v4.192.1/src/tests/opengl_surfaceless.c000066400000000000000000000214111417677245700221550ustar00rootroot00000000000000#include "gpu_tests.h" #ifndef EPOXY_HAS_EGL int main() { return SKIP; } #else // EPOXY_HAS_EGL #include "opengl/utils.h" #include #include static void opengl_interop_tests(pl_gpu gpu) { pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_LINEAR); if (!fmt) return; pl_tex export = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .sampleable = true, .renderable = true, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, )); REQUIRE(export); struct pl_opengl_wrap_params wrap = { .width = export->params.w, .height = export->params.h, .depth = export->params.d, }; wrap.texture = pl_opengl_unwrap(gpu, export, &wrap.target, &wrap.iformat, NULL); REQUIRE(wrap.texture); pl_tex import = pl_opengl_wrap(gpu, &wrap); REQUIRE(import); REQUIRE(import->params.renderable); REQUIRE(import->params.blit_dst == export->params.blit_dst); pl_tex_destroy(gpu, &import); pl_tex_destroy(gpu, &export); } #define PBUFFER_WIDTH 640 #define PBUFFER_HEIGHT 480 struct swapchain_priv { EGLDisplay display; EGLSurface surface; }; static void swap_buffers(void *priv) { struct swapchain_priv *p = priv; eglSwapBuffers(p->display, p->surface); } static void opengl_swapchain_tests(pl_opengl gl, EGLDisplay display, EGLSurface surface) { if (surface == EGL_NO_SURFACE) return; printf("testing opengl swapchain\n"); pl_gpu gpu = gl->gpu; pl_swapchain sw; sw = pl_opengl_create_swapchain(gl, pl_opengl_swapchain_params( .swap_buffers = swap_buffers, .priv = &(struct swapchain_priv) { display, surface }, )); REQUIRE(sw); int w = PBUFFER_WIDTH, h = PBUFFER_HEIGHT; REQUIRE(pl_swapchain_resize(sw, &w, &h)); for (int i = 0; i < 10; i++) { struct pl_swapchain_frame frame; REQUIRE(pl_swapchain_start_frame(sw, &frame)); if (frame.fbo->params.blit_dst) pl_tex_clear(gpu, frame.fbo, (float[4]){0}); // TODO: test this with an actual pl_renderer instance struct pl_frame target; pl_frame_from_swapchain(&target, &frame); REQUIRE(pl_swapchain_submit_frame(sw)); pl_swapchain_swap_buffers(sw); } pl_swapchain_destroy(&sw); } static void opengl_test_export_import(pl_opengl gl, enum pl_handle_type handle_type) { pl_gpu gpu = gl->gpu; printf("testing opengl import/export\n"); if (!(gpu->export_caps.tex & handle_type) || !(gpu->import_caps.tex & handle_type)) { fprintf(stderr, "%s unsupported caps!\n", __func__); return; } pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE); if (!fmt) { fprintf(stderr, "%s unsupported format\n", __func__); return; } pl_tex export = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .export_handle = handle_type, )); REQUIRE(export); REQUIRE_HANDLE(export->shared_mem, handle_type); pl_tex import = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .import_handle = handle_type, .shared_mem = export->shared_mem, )); REQUIRE(import); pl_tex_destroy(gpu, &import); pl_tex_destroy(gpu, &export); } int main() { // Create the OpenGL context if (!epoxy_has_egl_extension(EGL_NO_DISPLAY, "EGL_MESA_platform_surfaceless")) return SKIP; EGLDisplay dpy = eglGetPlatformDisplayEXT(EGL_PLATFORM_SURFACELESS_MESA, EGL_DEFAULT_DISPLAY, NULL); if (dpy == EGL_NO_DISPLAY) return SKIP; EGLint major, minor; if (!eglInitialize(dpy, &major, &minor)) return SKIP; printf("Initialized EGL v%d.%d\n", major, minor); int egl_ver = major * 10 + minor; struct { EGLenum api; EGLenum render; int major, minor; int glsl_ver; EGLenum profile; } egl_vers[] = { { EGL_OPENGL_API, EGL_OPENGL_BIT, 4, 6, 460, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 4, 5, 450, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 4, 4, 440, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 4, 0, 400, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 3, 330, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 2, 150, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 1, 140, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 0, 130, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 2, 1, 120, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 2, 0, 110, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, }, { EGL_OPENGL_ES_API, EGL_OPENGL_ES3_BIT, 3, 0, 300, }, { EGL_OPENGL_ES_API, EGL_OPENGL_ES2_BIT, 2, 0, 100, }, }; struct pl_glsl_version last_glsl = {0}; struct pl_gpu_limits last_limits = {0}; pl_log log = pl_test_logger(); for (int i = 0; i < PL_ARRAY_SIZE(egl_vers); i++) { const int cfg_attribs[] = { EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_RENDERABLE_TYPE, egl_vers[i].render, EGL_NONE }; EGLConfig config = 0; EGLint num_configs = 0; bool ok = eglChooseConfig(dpy, cfg_attribs, &config, 1, &num_configs); if (!ok || !num_configs) goto error; if (!eglBindAPI(egl_vers[i].api)) goto error; EGLContext egl; if (egl_vers[i].api == EGL_OPENGL_ES_API) { // OpenGL ES const EGLint egl_attribs[] = { EGL_CONTEXT_CLIENT_VERSION, egl_vers[i].major, (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE, EGL_NONE }; printf("Attempting creation of OpenGL ES v%d context\n", egl_vers[i].major); egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs); } else { // Desktop OpenGL const int egl_attribs[] = { EGL_CONTEXT_MAJOR_VERSION, egl_vers[i].major, EGL_CONTEXT_MINOR_VERSION, egl_vers[i].minor, EGL_CONTEXT_OPENGL_PROFILE_MASK, egl_vers[i].profile, (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE, EGL_NONE }; printf("Attempting creation of Desktop OpenGL v%d.%d context\n", egl_vers[i].major, egl_vers[i].minor); egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs); } if (!egl) goto error; const EGLint pbuffer_attribs[] = { EGL_WIDTH, PBUFFER_WIDTH, EGL_HEIGHT, PBUFFER_HEIGHT, EGL_NONE }; EGLSurface surf = eglCreatePbufferSurface(dpy, config, pbuffer_attribs); if (!eglMakeCurrent(dpy, surf, surf, egl)) goto error; pl_opengl gl = pl_opengl_create(log, pl_opengl_params( .max_glsl_version = egl_vers[i].glsl_ver, .debug = true, .egl_display = dpy, .egl_context = egl, #ifdef CI_ALLOW_SW .allow_software = true, #endif )); if (!gl) goto next; // Skip repeat tests pl_gpu gpu = gl->gpu; if (memcmp(&last_glsl, &gpu->glsl, sizeof(last_glsl)) == 0 && memcmp(&last_limits, &gpu->limits, sizeof(last_limits)) == 0) { printf("Skipping tests due to duplicate capabilities/version\n"); goto next; } last_glsl = gpu->glsl; last_limits = gpu->limits; gpu_shader_tests(gpu); gpu_interop_tests(gpu); opengl_interop_tests(gpu); opengl_swapchain_tests(gl, dpy, surf); opengl_test_export_import(gl, PL_HANDLE_DMA_BUF); // Reduce log spam after first successful test pl_log_level_update(log, PL_LOG_INFO); next: pl_opengl_destroy(&gl); eglDestroySurface(dpy, surf); eglDestroyContext(dpy, egl); continue; error: ; EGLint error = eglGetError(); if (error != EGL_SUCCESS) fprintf(stderr, "EGL error: %s\n", egl_err_str(error)); } eglTerminate(dpy); pl_log_destroy(&log); if (!last_glsl.version) return SKIP; } #endif // EPOXY_HAS_EGL libplacebo-v4.192.1/src/tests/string.c000066400000000000000000000070371417677245700176100ustar00rootroot00000000000000#include "tests.h" static const pl_str null = {0}; static const pl_str test = PL_STR0("test"); static const pl_str empty = PL_STR0(""); static inline bool is_null(pl_str str) { return !str.len && !str.buf; } static inline bool is_empty(pl_str str) { return !str.len; } int main() { void *tmp = pl_tmp(NULL); REQUIRE(is_null(pl_str0(NULL))); REQUIRE(is_null(pl_strdup(tmp, null))); char *empty0 = pl_strdup0(tmp, null); REQUIRE(empty0 && !empty0[0]); REQUIRE(pl_str_equals0(empty, empty0)); pl_str buf = {0}; pl_str_append(tmp, &buf, null); REQUIRE(is_null(buf)); pl_str_append_asprintf(tmp, &buf, "%.*s", PL_STR_FMT(test)); REQUIRE(pl_str_equals(buf, test)); REQUIRE(pl_strchr(null, ' ') < 0); REQUIRE(pl_strspn(null, " ") == 0); REQUIRE(pl_strcspn(null, " ") == 0); REQUIRE(is_null(pl_str_strip(null))); REQUIRE(pl_strchr(test, 's') == 2); REQUIRE(pl_strspn(test, "et") == 2); REQUIRE(pl_strcspn(test, "xs") == 2); REQUIRE(is_null(pl_str_take(null, 10))); REQUIRE(is_empty(pl_str_take(test, 0))); REQUIRE(is_null(pl_str_drop(null, 10))); REQUIRE(is_null(pl_str_drop(test, test.len))); REQUIRE(pl_str_equals(pl_str_drop(test, 0), test)); REQUIRE(pl_str_find(null, test) < 0); REQUIRE(pl_str_find(null, null) == 0); REQUIRE(pl_str_find(test, null) == 0); REQUIRE(pl_str_find(test, test) == 0); pl_str rest; REQUIRE(is_null(pl_str_split_char(null, ' ', &rest)) && is_null(rest)); REQUIRE(is_null(pl_str_split_str(null, test, &rest)) && is_null(rest)); REQUIRE(is_empty(pl_str_split_str(test, test, &rest)) && is_empty(rest)); REQUIRE(is_null(pl_str_getline(null, &rest)) && is_null(rest)); pl_str right, left = pl_str_split_char(pl_str0("left right"), ' ', &right); REQUIRE(pl_str_equals0(left, "left")); REQUIRE(pl_str_equals0(right, "right")); left = pl_str_split_str0(pl_str0("leftTESTright"), "TEST", &right); REQUIRE(pl_str_equals0(left, "left")); REQUIRE(pl_str_equals0(right, "right")); pl_str out; REQUIRE(pl_str_decode_hex(tmp, null, &out) && is_empty(out)); REQUIRE(!pl_str_decode_hex(tmp, pl_str0("invalid"), &out)); REQUIRE(pl_str_equals(null, null)); REQUIRE(pl_str_equals(null, empty)); REQUIRE(pl_str_startswith(null, null)); REQUIRE(pl_str_startswith(test, null)); REQUIRE(pl_str_startswith(test, test)); REQUIRE(pl_str_endswith(null, null)); REQUIRE(pl_str_endswith(test, null)); REQUIRE(pl_str_endswith(test, test)); float f; int i; REQUIRE(pl_str_parse_float(pl_str0("1.3984"), &f) && feq(f, 1.3984f, 1e-8)); REQUIRE(pl_str_parse_float(pl_str0("-8.9100083"), &f) && feq(f, -8.9100083f, 1e-8)); REQUIRE(pl_str_parse_float(pl_str0("-0"), &f) && feq(f, 0.0f, 1e-8)); REQUIRE(pl_str_parse_float(pl_str0("-3.14e20"), &f) && feq(f, -3.14e20f, 1e-8)); REQUIRE(pl_str_parse_float(pl_str0("0.5e-5"), &f) && feq(f, 0.5e-5f, 1e-8)); REQUIRE(pl_str_parse_float(pl_str0("0.5e+5"), &f) && feq(f, 0.5e+5f, 1e-8)); REQUIRE(pl_str_parse_int(pl_str0("64239"), &i) && i == 64239); REQUIRE(pl_str_parse_int(pl_str0("-102"), &i) && i == -102); REQUIRE(pl_str_parse_int(pl_str0("+1"), &i) && i == 1); REQUIRE(pl_str_parse_int(pl_str0("-0"), &i) && i == 0); REQUIRE(!pl_str_parse_float(null, &f)); REQUIRE(!pl_str_parse_float(test, &f)); REQUIRE(!pl_str_parse_float(empty, &f)); REQUIRE(!pl_str_parse_int(null, &i)); REQUIRE(!pl_str_parse_int(test, &i)); REQUIRE(!pl_str_parse_int(empty, &i)); pl_free(tmp); return 0; } libplacebo-v4.192.1/src/tests/tests.h000066400000000000000000000152341417677245700174470ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include #include #include #include #include static void pl_log_timestamp(void *stream, enum pl_log_level level, const char *msg) { static char letter[] = { [PL_LOG_FATAL] = 'f', [PL_LOG_ERR] = 'e', [PL_LOG_WARN] = 'w', [PL_LOG_INFO] = 'i', [PL_LOG_DEBUG] = 'd', [PL_LOG_TRACE] = 't', }; float secs = (float) clock() / CLOCKS_PER_SEC; printf("[%2.3f][%c] %s\n", secs, letter[level], msg); if (level <= PL_LOG_WARN) { // duplicate warnings/errors to stderr fprintf(stderr, "[%2.3f][%c] %s\n", secs, letter[level], msg); fflush(stderr); } } static inline pl_log pl_test_logger(void) { setbuf(stdout, NULL); setbuf(stderr, NULL); return pl_log_create(PL_API_VER, pl_log_params( .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_timestamp, .log_level = PL_LOG_DEBUG, )); } static inline void require(bool b, const char *msg, const char *file, int line) { if (!b) { fprintf(stderr, "=== FAILED: '%s' at %s:%d\n\n", msg, file, line); exit(1); } } static inline bool feq(float a, float b, float epsilon) { return fabs(a - b) < epsilon * fmax(1.0, fabs(a)); } #define REQUIRE(cond) require((cond), #cond, __FILE__, __LINE__) #define RANDOM (rand() / (float) RAND_MAX) #define SKIP 77 #define REQUIRE_HANDLE(shmem, type) \ switch (type) { \ case PL_HANDLE_FD: \ case PL_HANDLE_DMA_BUF: \ REQUIRE(shmem.handle.fd > -1); \ break; \ case PL_HANDLE_WIN32: \ case PL_HANDLE_WIN32_KMT: \ REQUIRE(shmem.handle.handle); \ /* INVALID_HANDLE_VALUE = (-1) */ \ REQUIRE(shmem.handle.handle != (void *)(intptr_t) (-1)); \ break; \ case PL_HANDLE_HOST_PTR: \ REQUIRE(shmem.handle.ptr); \ break; \ } static const struct pl_av1_grain_data av1_grain_data = { .num_points_y = 6, .points_y = {{0, 4}, {27, 33}, {54, 55}, {67, 61}, {108, 71}, {255, 72}}, .chroma_scaling_from_luma = false, .num_points_uv = {2, 2}, .points_uv = {{{0, 64}, {255, 64}}, {{0, 64}, {255, 64}}}, .scaling_shift = 11, .ar_coeff_lag = 3, .ar_coeffs_y = {4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66}, .ar_coeffs_uv = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127}, }, .ar_coeff_shift = 7, .grain_scale_shift = 0, .uv_mult = {0, 0}, .uv_mult_luma = {64, 64}, .uv_offset = {0, 0}, }; static const uint8_t h274_lower_bound = 10; static const uint8_t h274_upper_bound = 250; static const int16_t h274_values[6] = {16, 12, 14}; static const struct pl_h274_grain_data h274_grain_data = { .model_id = 0, .blending_mode_id = 0, .log2_scale_factor = 2, .component_model_present = {true}, .num_intensity_intervals = {1}, .num_model_values = {3}, .intensity_interval_lower_bound = {&h274_lower_bound}, .intensity_interval_upper_bound = {&h274_upper_bound}, .comp_model_value = {&h274_values}, }; static const struct pl_dovi_metadata dovi_meta = { .nonlinear = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}, .linear = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}, .comp = { { .num_pivots = 9, .pivots = {0.0615835786, 0.129032254, 0.353861183, 0.604105592, 0.854349971, 0.890518069, 0.906158328, 0.913978517, 0.92082113}, .method = {0, 0, 0, 0, 0, 0, 0, 0}, .poly_coeffs = { {-0.0488376617, 1.99335372, -2.41716385}, {-0.0141925812, 1.61829138, -1.53397191}, { 0.157061458, 0.63640213, -0.11302495}, {0.25272119, 0.246226311, 0.27281332}, {0.951621532, -1.35507894, 1.18898678}, {6.41251612, -13.6188488, 8.07336903}, {13.467535, -29.1869125, 16.6612244}, {28.2321472, -61.8516273, 34.7264938} }, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {1}, .mmr_order = {3}, .mmr_constant = {-0.500733018}, .mmr_coeffs = {{ {1.08411026, 3.80807829, 0.0881733894, -3.23097038, -0.409078479, -1.31310081, 2.71297002}, {-0.241833091, -3.57880807, -0.108109117, 3.13198471, 0.869203091, 1.96561158, -9.30871677}, {-0.177356839, 1.48970401, 0.0908923149, -0.510447979, -0.687603354, -0.934977889, 12.3544884}, }}, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {1}, .mmr_order = {3}, .mmr_constant = {-1.23833287}, .mmr_coeffs = {{ {3.52909589, 0.383154511, 5.50820637, -1.02094889, -6.36386824, 0.194121242, 0.64683497}, {-2.57899785, -0.626081586, -6.05729723, 2.29143763, 9.14653015, -0.0507702827, -4.17724133}, {0.705404401, 0.341412306, 2.98387456, -1.71712542, -4.91501331, 0.1465137, 6.38665438}, }}, }, }, }; libplacebo-v4.192.1/src/tests/tone_mapping.c000066400000000000000000000056131417677245700207600ustar00rootroot00000000000000#include "tests.h" #include "log.h" //#define PRINT_LUTS int main() { pl_log log = pl_test_logger(); // PQ unit tests REQUIRE(feq(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.0), 0.0, 1e-2)); REQUIRE(feq(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 1.0), 10000.0, 1e-2)); REQUIRE(feq(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.58), 203.0, 1e-2)); // Test round-trip for (float x = 0.0f; x < 1.0f; x += 0.01f) { REQUIRE(feq(x, pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, x)), 1e-5)); } static float lut[128]; struct pl_tone_map_params params = { .input_scaling = PL_HDR_PQ, .output_scaling = PL_HDR_PQ, .lut_size = PL_ARRAY_SIZE(lut), }; // Test regular tone-mapping params.input_min = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 0.005); params.input_max = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 1000.0); params.output_min = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 0.001); params.output_max = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 1.0); struct pl_tone_map_params params_inv = params; PL_SWAP(params_inv.input_min, params_inv.output_min); PL_SWAP(params_inv.input_max, params_inv.output_max); // Generate example tone mapping curves, forward and inverse for (int i = 0; i < pl_num_tone_map_functions; i++) { const struct pl_tone_map_function *fun = pl_tone_map_functions[i]; if (fun == &pl_tone_map_auto) continue; printf("Testing tone-mapping function %s\n", fun->name); params.function = params_inv.function = fun; clock_t start = clock(); pl_tone_map_generate(lut, ¶ms); pl_log_cpu_time(log, start, clock(), "generating LUT"); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { REQUIRE(isfinite(lut[j]) && !isnan(lut[j])); #ifdef PRINT_LUTS printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]); #endif } if (fun->map_inverse) { start = clock(); pl_tone_map_generate(lut, ¶ms_inv); pl_log_cpu_time(log, start, clock(), "generating inverse LUT"); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { REQUIRE(isfinite(lut[j]) && !isnan(lut[j])); #ifdef PRINT_LUTS printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]); #endif } } } // Test that `auto` is a no-op for 1:1 tone mapping params.output_min = params.input_min; params.output_max = params.input_max; params.function = &pl_tone_map_auto; pl_tone_map_generate(lut, ¶ms); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { float x = j / (PL_ARRAY_SIZE(lut) - 1.0f); x = PL_MIX(params.input_min, params.input_max, x); REQUIRE(feq(x, lut[j], 1e-5)); } pl_log_destroy(&log); } libplacebo-v4.192.1/src/tests/utils.c000066400000000000000000000124201417677245700174320ustar00rootroot00000000000000#include "tests.h" #include "gpu.h" int main() { struct pl_bit_encoding bits = {0}; struct pl_plane_data data = {0}; static const struct pl_bit_encoding bits0 = {0}; static const struct pl_bit_encoding bits8 = { .sample_depth = 8, .color_depth = 8, }; static const struct pl_bit_encoding bits16 = { .sample_depth = 16, .color_depth = 16, }; static const struct pl_bit_encoding bits10_16 = { .sample_depth = 16, .color_depth = 10, }; static const struct pl_bit_encoding bits10_16_6 = { .sample_depth = 16, .color_depth = 10, .bit_shift = 6, }; #define TEST_ALIGN(ref, ref_align, ref_bits, ...) \ do { \ pl_plane_data_from_mask(&data, (uint64_t[4]){ __VA_ARGS__ }); \ REQUIRE(memcmp(&data, &ref, sizeof(ref)) == 0); \ pl_plane_data_align(&data, &bits); \ REQUIRE(memcmp(&data, &ref_align, sizeof(ref_align)) == 0); \ REQUIRE(memcmp(&bits, &ref_bits, sizeof(bits)) == 0); \ } while (0) #define TEST(ref, bits, ...) TEST_ALIGN(ref, ref, bits, __VA_ARGS__) static const struct pl_plane_data rgb8 = { .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, }; TEST(rgb8, bits8, 0xFF, 0xFF00, 0xFF0000); static const struct pl_plane_data bgra8 = { .component_size = {8, 8, 8, 8}, .component_map = {2, 1, 0, 3}, }; TEST(bgra8, bits8, 0xFF0000, 0xFF00, 0xFF, 0xFF000000); static const struct pl_plane_data gr16 = { .component_size = {16, 16}, .component_map = {1, 0}, }; TEST(gr16, bits16, 0xFFFF0000, 0xFFFF); static const struct pl_plane_data r10x6g10 = { .component_size = {10, 10}, .component_map = {1, 0}, // LSB -> MSB ordering .component_pad = {0, 6}, }; TEST_ALIGN(r10x6g10, gr16, bits10_16, 0x03FF0000, 0x03FF); static const struct pl_plane_data rgb565 = { .component_size = {5, 6, 5}, .component_map = {2, 1, 0}, // LSB -> MSB ordering }; TEST(rgb565, bits0, 0xF800, 0x07E0, 0x001F); static const struct pl_plane_data rgba16 = { .component_size = {16, 16, 16, 16}, .component_map = {0, 1, 2, 3}, }; TEST(rgba16, bits16, 0xFFFFllu, 0xFFFF0000llu, 0xFFFF00000000llu, 0xFFFF000000000000llu); static const struct pl_plane_data p010 = { .component_size = {10, 10, 10}, .component_map = {0, 1, 2}, .component_pad = {6, 6, 6}, }; static const struct pl_plane_data rgb16 = { .component_size = {16, 16, 16}, .component_map = {0, 1, 2}, }; TEST_ALIGN(p010, rgb16, bits10_16_6, 0xFFC0llu, 0xFFC00000llu, 0xFFC000000000llu); // Test GLSL structure packing struct pl_var vec1 = pl_var_float(""), vec2 = pl_var_vec2(""), vec3 = pl_var_vec3(""), mat2 = pl_var_mat2(""), mat3 = pl_var_mat3(""); struct pl_var_layout layout; layout = pl_std140_layout(0, &vec2); REQUIRE(layout.offset == 0); REQUIRE(layout.stride == 2 * sizeof(float)); REQUIRE(layout.size == 2 * sizeof(float)); layout = pl_std140_layout(3 * sizeof(float), &vec3); REQUIRE(layout.offset == 4 * sizeof(float)); REQUIRE(layout.stride == 3 * sizeof(float)); REQUIRE(layout.size == 3 * sizeof(float)); layout = pl_std140_layout(2 * sizeof(float), &mat3); REQUIRE(layout.offset == 4 * sizeof(float)); REQUIRE(layout.stride == 4 * sizeof(float)); REQUIRE(layout.size == 3 * 4 * sizeof(float)); layout = pl_std430_layout(2 * sizeof(float), &mat3); REQUIRE(layout.offset == 4 * sizeof(float)); REQUIRE(layout.stride == 4 * sizeof(float)); REQUIRE(layout.size == 4 * 3 * sizeof(float)); layout = pl_std140_layout(3 * sizeof(float), &vec1); REQUIRE(layout.offset == 3 * sizeof(float)); REQUIRE(layout.stride == sizeof(float)); REQUIRE(layout.size == sizeof(float)); struct pl_var vec2a = vec2; vec2a.dim_a = 50; layout = pl_std140_layout(sizeof(float), &vec2a); REQUIRE(layout.offset == 4 * sizeof(float)); REQUIRE(layout.stride == 4 * sizeof(float)); REQUIRE(layout.size == 50 * 4 * sizeof(float)); layout = pl_std430_layout(sizeof(float), &vec2a); REQUIRE(layout.offset == 2 * sizeof(float)); REQUIRE(layout.stride == 2 * sizeof(float)); REQUIRE(layout.size == 50 * 2 * sizeof(float)); struct pl_var mat2a = mat2; mat2a.dim_a = 20; layout = pl_std140_layout(5 * sizeof(float), &mat2a); REQUIRE(layout.offset == 8 * sizeof(float)); REQUIRE(layout.stride == 4 * sizeof(float)); REQUIRE(layout.size == 20 * 2 * 4 * sizeof(float)); layout = pl_std430_layout(5 * sizeof(float), &mat2a); REQUIRE(layout.offset == 6 * sizeof(float)); REQUIRE(layout.stride == 2 * sizeof(float)); REQUIRE(layout.size == 20 * 2 * 2 * sizeof(float)); for (const struct pl_named_var *nvar = pl_var_glsl_types; nvar->glsl_name; nvar++) { struct pl_var var = nvar->var; REQUIRE(nvar->glsl_name == pl_var_glsl_type_name(var)); var.dim_a = 100; REQUIRE(nvar->glsl_name == pl_var_glsl_type_name(var)); } } libplacebo-v4.192.1/src/tests/vulkan.c000066400000000000000000000175561417677245700176110ustar00rootroot00000000000000#include "gpu_tests.h" #include "vulkan/command.h" #include "vulkan/gpu.h" #include static void vulkan_interop_tests(pl_vulkan pl_vk, enum pl_handle_type handle_type) { pl_gpu gpu = pl_vk->gpu; printf("testing vulkan interop for handle type 0x%x\n", handle_type); if (gpu->export_caps.buf & handle_type) { pl_buf buf = pl_buf_create(gpu, pl_buf_params( .size = 1024, .export_handle = handle_type, )); REQUIRE(buf); REQUIRE_HANDLE(buf->shared_mem, handle_type); REQUIRE(buf->shared_mem.size >= buf->params.size); REQUIRE(pl_buf_export(gpu, buf)); pl_buf_destroy(gpu, &buf); } pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE); if (!fmt) return; if (gpu->export_caps.sync & handle_type) { pl_sync sync = pl_sync_create(gpu, handle_type); pl_tex tex = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .blit_dst = true, )); REQUIRE(sync); REQUIRE(tex); // Note: For testing purposes, we have to fool pl_tex_export into // thinking this texture is actually exportable. Just hack it in // horribly. ((struct pl_tex_params *) &tex->params)->export_handle = PL_HANDLE_DMA_BUF; REQUIRE(pl_tex_export(gpu, tex, sync)); // Re-use our internal helpers to signal this VkSemaphore struct vk_ctx *vk = PL_PRIV(pl_vk); struct vk_cmd *cmd = vk_cmd_begin(vk, vk->pool_graphics); REQUIRE(cmd); struct pl_sync_vk *sync_vk = PL_PRIV(sync); vk_cmd_sig(cmd, (pl_vulkan_sem){ sync_vk->signal }); REQUIRE(vk_cmd_submit(vk, &cmd)); // Do something with the image again to "import" it pl_tex_clear(gpu, tex, (float[4]){0}); pl_gpu_finish(gpu); REQUIRE(!pl_tex_poll(gpu, tex, 0)); pl_sync_destroy(gpu, &sync); pl_tex_destroy(gpu, &tex); } } static void vulkan_swapchain_tests(pl_vulkan vk, VkSurfaceKHR surf) { if (!surf) return; printf("testing vulkan swapchain\n"); pl_gpu gpu = vk->gpu; pl_swapchain sw; sw = pl_vulkan_create_swapchain(vk, pl_vulkan_swapchain_params( .surface = surf, )); REQUIRE(sw); // Attempt actually initializing the swapchain int w = 640, h = 480; REQUIRE(pl_swapchain_resize(sw, &w, &h)); for (int i = 0; i < 10; i++) { struct pl_swapchain_frame frame; REQUIRE(pl_swapchain_start_frame(sw, &frame)); if (frame.fbo->params.blit_dst) pl_tex_clear(gpu, frame.fbo, (float[4]){0}); // TODO: test this with an actual pl_renderer instance struct pl_frame target; pl_frame_from_swapchain(&target, &frame); REQUIRE(pl_swapchain_submit_frame(sw)); pl_swapchain_swap_buffers(sw); // Try resizing the swapchain in the middle of rendering if (i == 5) { w = 320; h = 240; REQUIRE(pl_swapchain_resize(sw, &w, &h)); } } pl_swapchain_destroy(&sw); } int main() { pl_log log = pl_test_logger(); pl_vk_inst inst = pl_vk_inst_create(log, pl_vk_inst_params( .debug = true, .debug_extra = true, .get_proc_addr = vkGetInstanceProcAddr, .opt_extensions = (const char *[]){ VK_KHR_SURFACE_EXTENSION_NAME, "VK_EXT_headless_surface", // in case it isn't defined }, .num_opt_extensions = 2, )); if (!inst) return SKIP; PL_VK_LOAD_FUN(inst->instance, EnumeratePhysicalDevices, inst->get_proc_addr); PL_VK_LOAD_FUN(inst->instance, GetPhysicalDeviceProperties, inst->get_proc_addr); uint32_t num = 0; EnumeratePhysicalDevices(inst->instance, &num, NULL); if (!num) return SKIP; VkPhysicalDevice *devices = calloc(num, sizeof(*devices)); if (!devices) return 1; EnumeratePhysicalDevices(inst->instance, &num, devices); VkSurfaceKHR surf = NULL; #ifdef VK_EXT_headless_surface PL_VK_LOAD_FUN(inst->instance, CreateHeadlessSurfaceEXT, inst->get_proc_addr); if (CreateHeadlessSurfaceEXT) { VkHeadlessSurfaceCreateInfoEXT info = { .sType = VK_STRUCTURE_TYPE_HEADLESS_SURFACE_CREATE_INFO_EXT, }; VkResult res = CreateHeadlessSurfaceEXT(inst->instance, &info, NULL, &surf); REQUIRE(res == VK_SUCCESS); } #endif // VK_EXT_headless_surface // Make sure choosing any device works VkPhysicalDevice dev; dev = pl_vulkan_choose_device(log, pl_vulkan_device_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .allow_software = true, .surface = surf, )); if (!dev) return SKIP; // Test all attached devices for (int i = 0; i < num; i++) { VkPhysicalDeviceProperties props = {0}; GetPhysicalDeviceProperties(devices[i], &props); #ifndef CI_ALLOW_SW if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) { printf("Skipping device %d: %s\n", i, props.deviceName); continue; } #endif printf("Testing device %d: %s\n", i, props.deviceName); // Make sure we can choose this device by name dev = pl_vulkan_choose_device(log, pl_vulkan_device_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .device_name = props.deviceName, )); REQUIRE(dev == devices[i]); struct pl_vulkan_params params = *pl_vulkan_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .device = devices[i], .queue_count = 8, // test inter-queue stuff .surface = surf, ); pl_vulkan vk = pl_vulkan_create(log, ¶ms); if (!vk) continue; gpu_shader_tests(vk->gpu); vulkan_swapchain_tests(vk, surf); // Print heap statistics pl_vk_print_heap(vk->gpu, PL_LOG_DEBUG); // Test importing this context via the vulkan interop API pl_vulkan vk2 = pl_vulkan_import(log, pl_vulkan_import_params( .instance = vk->instance, .get_proc_addr = inst->get_proc_addr, .phys_device = vk->phys_device, .device = vk->device, .extensions = vk->extensions, .num_extensions = vk->num_extensions, .features = vk->features, .queue_graphics = vk->queue_graphics, .queue_compute = vk->queue_compute, .queue_transfer = vk->queue_transfer, )); REQUIRE(vk2); pl_vulkan_destroy(&vk2); // Run these tests last because they disable some validation layers #ifdef PL_HAVE_UNIX vulkan_interop_tests(vk, PL_HANDLE_FD); vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF); #endif #ifdef PL_HAVE_WIN32 vulkan_interop_tests(vk, PL_HANDLE_WIN32); vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT); #endif gpu_interop_tests(vk->gpu); pl_vulkan_destroy(&vk); // Re-run the same export/import tests with async queues disabled params.async_compute = false; params.async_transfer = false; vk = pl_vulkan_create(log, ¶ms); REQUIRE(vk); // it succeeded the first time #ifdef PL_HAVE_UNIX vulkan_interop_tests(vk, PL_HANDLE_FD); vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF); #endif #ifdef PL_HAVE_WIN32 vulkan_interop_tests(vk, PL_HANDLE_WIN32); vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT); #endif gpu_interop_tests(vk->gpu); pl_vulkan_destroy(&vk); // Reduce log spam after first tested device pl_log_level_update(log, PL_LOG_INFO); } vkDestroySurfaceKHR(inst->instance, surf, NULL); pl_vk_inst_destroy(&inst); pl_log_destroy(&log); free(devices); } libplacebo-v4.192.1/src/tone_mapping.c000066400000000000000000000406571417677245700176250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" static const float PQ_M1 = 2610./4096 * 1./4, PQ_M2 = 2523./4096 * 128, PQ_C1 = 3424./4096, PQ_C2 = 2413./4096 * 32, PQ_C3 = 2392./4096 * 32; float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x) { if (from == to) return x; if (!x) // micro-optimization for common value return x; // Convert input to PL_SCALE_RELATIVE switch (from) { case PL_HDR_PQ: x = powf(x, 1.0f / PQ_M2); x = fmaxf(x - PQ_C1, 0.0f) / (PQ_C2 - PQ_C3 * x); x = powf(x, 1.0f / PQ_M1); x *= 10000.0f; // fall through case PL_HDR_NITS: x /= PL_COLOR_SDR_WHITE; // fall through case PL_HDR_NORM: goto output; case PL_HDR_SQRT: x *= x; goto output; case PL_HDR_SCALING_COUNT: break; } pl_unreachable(); output: // Convert PL_SCALE_RELATIVE to output switch (to) { case PL_HDR_NORM: return x; case PL_HDR_SQRT: return sqrtf(x); case PL_HDR_NITS: return x * PL_COLOR_SDR_WHITE; case PL_HDR_PQ: x *= PL_COLOR_SDR_WHITE / 10000.0f; x = powf(x, PQ_M1); x = (PQ_C1 + PQ_C2 * x) / (1.0f + PQ_C3 * x); x = powf(x, PQ_M2); return x; case PL_HDR_SCALING_COUNT: break; } pl_unreachable(); } bool pl_tone_map_params_equal(const struct pl_tone_map_params *a, const struct pl_tone_map_params *b) { return a->function == b->function && a->param == b->param && a->input_scaling == b->input_scaling && a->output_scaling == b->output_scaling && a->lut_size == b->lut_size && a->input_min == b->input_min && a->input_max == b->input_max && a->output_min == b->output_min && a->output_max == b->output_max; } bool pl_tone_map_params_noop(const struct pl_tone_map_params *p) { float in_min = pl_hdr_rescale(p->input_scaling, PL_HDR_NITS, p->input_min); float in_max = pl_hdr_rescale(p->input_scaling, PL_HDR_NITS, p->input_max); float out_min = pl_hdr_rescale(p->output_scaling, PL_HDR_NITS, p->output_min); float out_max = pl_hdr_rescale(p->output_scaling, PL_HDR_NITS, p->output_max); return fabs(in_min - out_min) < 1e-4 && // no BPC in_max < out_max + 1e-2 && // no range reduction (out_max < in_max + 1e-2 || !p->function->map_inverse); } static struct pl_tone_map_params fix_params(const struct pl_tone_map_params *params) { const struct pl_tone_map_function *fun = PL_DEF(params->function, &pl_tone_map_clip); float param = PL_DEF(params->param, fun->param_def); if (fun == &pl_tone_map_auto) { float src_max = pl_hdr_rescale(params->input_scaling, PL_HDR_NORM, params->input_max); float dst_max = pl_hdr_rescale(params->output_scaling, PL_HDR_NORM, params->output_max); float ratio = src_max / dst_max; if (ratio > 10) { // Extreme reduction: Pick spline for its quasi-linear behavior fun = &pl_tone_map_spline; } else if (fmaxf(ratio, 1 / ratio) > 2) { // Reasonably ranged HDR<->SDR conversion, pick BT.2446a since it // was designed for this task fun = &pl_tone_map_bt2446a; } else if (ratio < 1) { // Small range inverse tone mapping, pick spline since BT.2446a // distorts colors too much fun = &pl_tone_map_spline; } else { // Small range conversion (nearly no-op), pick BT.2390 because it // has the best asymptotic behavior (approximately linear). fun = &pl_tone_map_bt2390; } param = fun->param_def; } return (struct pl_tone_map_params) { .function = fun, .param = PL_CLAMP(param, fun->param_min, fun->param_max), .lut_size = params->lut_size, .input_scaling = fun->scaling, .output_scaling = fun->scaling, .input_min = pl_hdr_rescale(params->input_scaling, fun->scaling, params->input_min), .input_max = pl_hdr_rescale(params->input_scaling, fun->scaling, params->input_max), .output_min = pl_hdr_rescale(params->output_scaling, fun->scaling, params->output_min), .output_max = pl_hdr_rescale(params->output_scaling, fun->scaling, params->output_max), }; } #define FOREACH_LUT(lut, V) \ for (float *_iter = lut, *_end = lut + params->lut_size, V; \ _iter < _end && ( V = *_iter, 1 ); *_iter++ = V) static void map_lut(float *lut, const struct pl_tone_map_params *params) { if (params->output_max > params->input_max + 1e-4) { // Inverse tone-mapping if (params->function->map_inverse) { params->function->map_inverse(lut, params); } else { // Perform naive (linear-stretched) BPC only FOREACH_LUT(lut, x) { x -= params->input_min; x *= (params->input_max - params->output_min) / (params->input_max - params->input_min); x += params->output_min; } } } else { // Forward tone-mapping params->function->map(lut, params); } } void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params) { struct pl_tone_map_params fixed = fix_params(params); // Generate input values evenly spaced in `params->input_scaling` for (size_t i = 0; i < params->lut_size; i++) { float x = (float) i / (params->lut_size - 1); x = PL_MIX(params->input_min, params->input_max, x); out[i] = pl_hdr_rescale(params->input_scaling, fixed.function->scaling, x); } map_lut(out, &fixed); // Sanitize outputs and adapt back to `params->scaling` for (size_t i = 0; i < params->lut_size; i++) { float x = PL_CLAMP(out[i], fixed.output_min, fixed.output_max); out[i] = pl_hdr_rescale(fixed.function->scaling, params->output_scaling, x); } } float pl_tone_map_sample(float x, const struct pl_tone_map_params *params) { struct pl_tone_map_params fixed = fix_params(params); fixed.lut_size = 1; x = PL_CLAMP(x, params->input_min, params->input_max); x = pl_hdr_rescale(params->input_scaling, fixed.function->scaling, x); map_lut(&x, &fixed); x = PL_CLAMP(x, fixed.output_min, fixed.output_max); x = pl_hdr_rescale(fixed.function->scaling, params->output_scaling, x); return x; } // Rescale from input-absolute to input-relative static inline float rescale_in(float x, const struct pl_tone_map_params *params) { return (x - params->input_min) / (params->input_max - params->input_min); } // Rescale from input-absolute to output-relative static inline float rescale(float x, const struct pl_tone_map_params *params) { return (x - params->input_min) / (params->output_max - params->output_min); } // Rescale from output-relative to output-absolute static inline float rescale_out(float x, const struct pl_tone_map_params *params) { return x * (params->output_max - params->output_min) + params->output_min; } static inline float bt1886_eotf(float x, float min, float max) { const float lb = powf(min, 1/2.4f); const float lw = powf(max, 1/2.4f); return powf((lw - lb) * x + lb, 2.4f); } static inline float bt1886_oetf(float x, float min, float max) { const float lb = powf(min, 1/2.4f); const float lw = powf(max, 1/2.4f); return (powf(x, 1/2.4f) - lb) / (lw - lb); } const struct pl_tone_map_function pl_tone_map_auto = { .name = "auto", .description = "Automatic selection", }; static void noop(float *lut, const struct pl_tone_map_params *params) { return; } const struct pl_tone_map_function pl_tone_map_clip = { .name = "clip", .description = "No tone mapping (clip)", .map = noop, .map_inverse = noop, }; static void bt2390(float *lut, const struct pl_tone_map_params *params) { const float minLum = rescale_in(params->output_min, params); const float maxLum = rescale_in(params->output_max, params); const float offset = params->param; const float ks = (1 + offset) * maxLum - offset; const float bp = minLum > 0 ? fminf(1 / minLum, 4) : 4; const float gain_inv = 1 + minLum / maxLum * powf(1 - maxLum, bp); const float gain = maxLum < 1 ? 1 / gain_inv : 1; FOREACH_LUT(lut, x) { x = rescale_in(x, params); // Piece-wise hermite spline if (ks < 1) { float tb = (x - ks) / (1 - ks); float tb2 = tb * tb; float tb3 = tb2 * tb; float pb = (2 * tb3 - 3 * tb2 + 1) * ks + (tb3 - 2 * tb2 + tb) * (1 - ks) + (-2 * tb3 + 3 * tb2) * maxLum; x = x < ks ? x : pb; } // Black point adaptation if (x < 1) { x += minLum * powf(1 - x, bp); x = gain * (x - minLum) + minLum; } x = x * (params->input_max - params->input_min) + params->input_min; } } const struct pl_tone_map_function pl_tone_map_bt2390 = { .name = "bt2390", .description = "ITU-R BT.2390 EETF", .scaling = PL_HDR_PQ, .param_desc = "Knee offset", .param_min = 0.50, .param_def = 1.00, .param_max = 2.00, .map = bt2390, }; static void bt2446a(float *lut, const struct pl_tone_map_params *params) { const float phdr = 1 + 32 * powf(params->input_max / 10000, 1/2.4f); const float psdr = 1 + 32 * powf(params->output_max / 10000, 1/2.4f); FOREACH_LUT(lut, x) { x = powf(rescale_in(x, params), 1/2.4f); x = logf(1 + (phdr - 1) * x) / logf(phdr); if (x <= 0.7399f) { x = 1.0770f * x; } else if (x < 0.9909f) { x = (-1.1510f * x + 2.7811f) * x - 0.6302f; } else { x = 0.5f * x + 0.5f; } x = (powf(psdr, x) - 1) / (psdr - 1); x = bt1886_eotf(x, params->output_min, params->output_max); } } static void bt2446a_inv(float *lut, const struct pl_tone_map_params *params) { FOREACH_LUT(lut, x) { x = bt1886_oetf(x, params->input_min, params->input_max); x *= 255.0; if (x > 70) { x = powf(x, (2.8305e-6f * x - 7.4622e-4f) * x + 1.2528f); } else { x = powf(x, (1.8712e-5f * x - 2.7334e-3f) * x + 1.3141f); } x = powf(x / 1000, 2.4f); x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_bt2446a = { .name = "bt2446a", .description = "ITU-R BT.2446 Method A", .scaling = PL_HDR_NITS, .map = bt2446a, .map_inverse = bt2446a_inv, }; static void spline(float *lut, const struct pl_tone_map_params *params) { // Normalize everything the pivot to make the math easier const float pivot = params->param; const float in_min = params->input_min - pivot; const float in_max = params->input_max - pivot; const float out_min = params->output_min - pivot; const float out_max = params->output_max - pivot; // Solve P of order 2 for: // P(in_min) = out_min // P'(0.0) = 1.0 // P(0.0) = 0.0 const float Pa = (out_min - in_min) / (in_min * in_min); // Solve Q of order 3 for: // Q(in_min) = out_min // Q''(in_min) = 0.0 // Q(0.0) = 0.0 // Q'(0.0) = 1.0 const float t = 2 * in_max * in_max; const float Qa = (in_max - out_max) / (in_max * t); const float Qb = -3 * (in_max - out_max) / t; FOREACH_LUT(lut, x) { x -= pivot; x = x > 0 ? ((Qa * x + Qb) * x + 1) * x : (Pa * x + 1) * x; x += pivot; } } const struct pl_tone_map_function pl_tone_map_spline = { .name = "spline", .description = "Single-pivot polynomial spline", .param_desc = "Pivot point", .param_min = 0.15, // ~1 nits .param_def = 0.30, // ~10 nits .param_max = 0.50, // ~100 nits .scaling = PL_HDR_PQ, .map = spline, .map_inverse = spline, }; static void reinhard(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), contrast = params->param, offset = (1.0 - contrast) / contrast, scale = (peak + offset) / peak; FOREACH_LUT(lut, x) { x = rescale(x, params); x = x / (x + offset); x *= scale; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_reinhard = { .name = "reinhard", .description = "Reinhard", .param_desc = "Contrast", .param_min = 0.00, .param_def = 0.50, .param_max = 0.99, .map = reinhard, }; static void mobius(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), j = params->param; // Solve for M(j) = j; M(peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) const float a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak); const float b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f); const float scale = (b*b + 2.0f * b*j + j*j) / (b - a); FOREACH_LUT(lut, x) { x = rescale(x, params); x = x <= j ? x : scale * (x + a) / (x + b); x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_mobius = { .name = "mobius", .description = "Mobius", .param_desc = "Knee point", .param_min = 0.00, .param_def = 0.30, .param_max = 0.99, .map = mobius, }; static inline float hable(float x) { const float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; return ((x * (A*x + C*B) + D*E) / (x * (A*x + B) + D*F)) - E/F; } static void hable_map(float *lut, const struct pl_tone_map_params *params) { const float peak = params->input_max / params->output_max, scale = 1.0f / hable(peak); FOREACH_LUT(lut, x) { x = bt1886_oetf(x, params->input_min, params->input_max); x = bt1886_eotf(x, 0, peak); x = scale * hable(x); x = bt1886_oetf(x, 0, 1); x = bt1886_eotf(x, params->output_min, params->output_max); } } const struct pl_tone_map_function pl_tone_map_hable = { .name = "hable", .description = "Filmic tone-mapping (Hable)", .map = hable_map, }; static void gamma_map(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), cutoff = params->param, gamma = logf(cutoff) / logf(cutoff / peak); FOREACH_LUT(lut, x) { x = rescale(x, params); x = x > cutoff ? powf(x / peak, gamma) : x; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_gamma = { .name = "gamma", .description = "Gamma function with knee", .param_desc = "Knee point", .param_min = 0.001, .param_def = 0.50, .param_max = 1.00, .map = gamma_map, }; static void linear(float *lut, const struct pl_tone_map_params *params) { const float gain = params->param; FOREACH_LUT(lut, x) { x = rescale_in(x, params); x *= gain; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_linear = { .name = "linear", .description = "Perceptually linear stretch", .param_desc = "Exposure", .param_min = 0.001, .param_def = 1.00, .param_max = 10.0, .scaling = PL_HDR_PQ, .map = linear, .map_inverse = linear, }; const struct pl_tone_map_function * const pl_tone_map_functions[] = { &pl_tone_map_auto, &pl_tone_map_clip, &pl_tone_map_bt2390, &pl_tone_map_bt2446a, &pl_tone_map_spline, &pl_tone_map_reinhard, &pl_tone_map_mobius, &pl_tone_map_hable, &pl_tone_map_gamma, &pl_tone_map_linear, NULL }; const int pl_num_tone_map_functions = PL_ARRAY_SIZE(pl_tone_map_functions) - 1; libplacebo-v4.192.1/src/utils/000077500000000000000000000000001417677245700161255ustar00rootroot00000000000000libplacebo-v4.192.1/src/utils/frame_queue.c000066400000000000000000000605501417677245700205750ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "pl_thread.h" struct cache_entry { pl_tex tex[4]; }; struct entry { struct cache_entry cache; struct pl_source_frame src; struct pl_frame frame; uint64_t signature; bool mapped; bool ok; }; // Hard limits for vsync timing validity #define MIN_FPS 10 #define MAX_FPS 200 // Limits for FPS estimation state #define MAX_SAMPLES 32 #define MIN_SAMPLES 8 // Stickiness to prevent `interpolation_threshold` oscillation #define THRESHOLD_MAX_RATIO 0.3 #define THRESHOLD_FRAMES 5 // Maximum number of not-yet-mapped frames to allow queueing in advance #define PREFETCH_FRAMES 2 struct pool { float samples[MAX_SAMPLES]; float estimate; float sum; int idx; int num; int total; }; struct pl_queue { pl_gpu gpu; pl_log log; // For multi-threading, we use two locks. The `lock_weak` guards the queue // state itself. The `lock_strong` has a bigger scope and should be held // for the duration of any functions that expect the queue state to // remain more or less valid (with the exception of adding new members). // // In particular, `pl_queue_reset` and `pl_queue_update` will take // the strong lock, while `pl_queue_push_*` will only take the weak // lock. pl_mutex lock_strong; pl_mutex lock_weak; pl_cond wakeup; // Frame queue and state PL_ARRAY(struct entry *) queue; uint64_t signature; int threshold_frames; bool want_frame; bool eof; // Average vsync/frame fps estimation state struct pool vps, fps; float reported_vps; float reported_fps; float prev_pts; // Storage for temporary arrays PL_ARRAY(uint64_t) tmp_sig; PL_ARRAY(float) tmp_ts; PL_ARRAY(const struct pl_frame *) tmp_frame; // Queue of GPU objects to reuse PL_ARRAY(struct cache_entry) cache; }; pl_queue pl_queue_create(pl_gpu gpu) { pl_queue p = pl_alloc_ptr(NULL, p); *p = (struct pl_queue) { .gpu = gpu, .log = gpu->log, }; pl_mutex_init(&p->lock_strong); pl_mutex_init(&p->lock_weak); PL_CHECK_ERR(pl_cond_init(&p->wakeup)); return p; } static inline void unmap_frame(pl_queue p, struct entry *entry) { if (!entry->mapped && entry->src.discard) { PL_TRACE(p, "Discarding unused frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); entry->src.discard(&entry->src); } if (entry->mapped && entry->ok && entry->src.unmap) { PL_TRACE(p, "Unmapping frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); entry->src.unmap(p->gpu, &entry->frame, &entry->src); } } void pl_queue_destroy(pl_queue *queue) { pl_queue p = *queue; if (!p) return; for (int n = 0; n < p->queue.num; n++) { struct entry *entry = p->queue.elem[n]; unmap_frame(p, entry); for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++) pl_tex_destroy(p->gpu, &entry->cache.tex[i]); } for (int n = 0; n < p->cache.num; n++) { for (int i = 0; i < PL_ARRAY_SIZE(p->cache.elem[n].tex); i++) pl_tex_destroy(p->gpu, &p->cache.elem[n].tex[i]); } pl_cond_destroy(&p->wakeup); pl_mutex_destroy(&p->lock_weak); pl_mutex_destroy(&p->lock_strong); pl_free(p); *queue = NULL; } static inline void cull_entry(pl_queue p, struct entry *entry) { unmap_frame(p, entry); // Recycle non-empty texture cache entries static const struct cache_entry null_cache = {0}; if (memcmp(&entry->cache, &null_cache, sizeof(null_cache)) != 0) { for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++) { if (entry->cache.tex[i]) pl_tex_invalidate(p->gpu, entry->cache.tex[i]); } PL_ARRAY_APPEND(p, p->cache, entry->cache); } pl_free(entry); } void pl_queue_reset(pl_queue p) { pl_mutex_lock(&p->lock_strong); pl_mutex_lock(&p->lock_weak); for (int i = 0; i < p->queue.num; i++) cull_entry(p, p->queue.elem[i]); *p = (struct pl_queue) { .gpu = p->gpu, .log = p->log, // Reuse lock objects .lock_strong = p->lock_strong, .lock_weak = p->lock_weak, .wakeup = p->wakeup, // Explicitly preserve allocations .queue.elem = p->queue.elem, .tmp_sig.elem = p->tmp_sig.elem, .tmp_ts.elem = p->tmp_ts.elem, .tmp_frame.elem = p->tmp_frame.elem, // Reuse GPU object cache entirely .cache = p->cache, }; pl_cond_signal(&p->wakeup); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); } static inline float delta(float old, float new) { return fabs((new - old) / PL_MIN(new, old)); } static inline void update_estimate(struct pool *pool, float cur) { if (pool->num) { static const float max_delta = 0.3; if (delta(pool->sum / pool->num, cur) > max_delta) { pool->sum = 0.0; pool->num = pool->idx = 0; } } if (pool->num++ == MAX_SAMPLES) { pool->sum -= pool->samples[pool->idx]; pool->num--; } pool->sum += pool->samples[pool->idx] = cur; pool->idx = (pool->idx + 1) % MAX_SAMPLES; pool->total++; if (pool->total < MIN_SAMPLES || pool->num >= MIN_SAMPLES) pool->estimate = pool->sum / pool->num; } static void queue_push(pl_queue p, const struct pl_source_frame *src) { if (p->eof && !src) return; // ignore duplicate EOF if (p->eof && src) { PL_INFO(p, "Received frame after EOF signaled... discarding frame!"); if (src->discard) src->discard(src); return; } pl_cond_signal(&p->wakeup); if (!src) { PL_TRACE(p, "Received EOF, draining frame queue..."); p->eof = true; p->want_frame = false; return; } // Update FPS estimates if possible/reasonable if (p->queue.num) { float last_pts = p->queue.elem[p->queue.num - 1]->src.pts; float delta = src->pts - last_pts; if (delta < 0.0) { PL_DEBUG(p, "Backwards source PTS jump %f -> %f", last_pts, src->pts); } else if (p->fps.estimate && delta > 10.0 * p->fps.estimate) { PL_DEBUG(p, "Discontinuous source PTS jump %f -> %f", last_pts, src->pts); } else { update_estimate(&p->fps, delta); } } else if (src->pts != 0) { PL_DEBUG(p, "First frame received with non-zero PTS %f", src->pts); } struct entry *entry = pl_alloc_ptr(NULL, entry); *entry = (struct entry) { .signature = p->signature++, .src = *src, }; PL_ARRAY_POP(p->cache, &entry->cache); PL_TRACE(p, "Added new frame id %"PRIu64" with PTS %f", entry->signature, src->pts); // Insert new entry into the correct spot in the queue, sorted by PTS for (int i = p->queue.num;; i--) { if (i == 0 || p->queue.elem[i - 1]->src.pts <= src->pts) { PL_ARRAY_INSERT_AT(p, p->queue, i, entry); break; } } p->want_frame = false; } void pl_queue_push(pl_queue p, const struct pl_source_frame *frame) { pl_mutex_lock(&p->lock_weak); queue_push(p, frame); pl_mutex_unlock(&p->lock_weak); } static bool queue_has_room(pl_queue p) { if (p->want_frame) return true; // Examine the queue tail for (int i = p->queue.num - 1; i >= 0; i--) { if (p->queue.elem[i]->mapped) return true; if (p->queue.num - i >= PREFETCH_FRAMES) return false; } return true; } bool pl_queue_push_block(pl_queue p, uint64_t timeout, const struct pl_source_frame *frame) { pl_mutex_lock(&p->lock_weak); if (!timeout || !frame || p->eof) goto skip_blocking; while (!queue_has_room(p) && !p->eof) { if (pl_cond_timedwait(&p->wakeup, &p->lock_weak, timeout) == ETIMEDOUT) { pl_mutex_unlock(&p->lock_weak); return false; } } skip_blocking: queue_push(p, frame); pl_mutex_unlock(&p->lock_weak); return true; } static void report_estimates(pl_queue p) { if (p->fps.total >= MIN_SAMPLES && p->vps.total >= MIN_SAMPLES) { if (p->reported_fps && p->reported_vps) { // Only re-report the estimates if they've changed considerably // from the previously reported values static const float report_delta = 0.3; float delta_fps = delta(p->reported_fps, p->fps.estimate); float delta_vps = delta(p->reported_vps, p->vps.estimate); if (delta_fps < report_delta && delta_vps < report_delta) return; } PL_INFO(p, "Estimated source FPS: %.3f, display FPS: %.3f", 1.0 / p->fps.estimate, 1.0 / p->vps.estimate); p->reported_fps = p->fps.estimate; p->reported_vps = p->vps.estimate; } } // note: may add more than one frame, since it releases the lock static enum pl_queue_status get_frame(pl_queue p, const struct pl_queue_params *params) { if (p->eof) return PL_QUEUE_EOF; if (!params->get_frame) { if (!params->timeout) return PL_QUEUE_MORE; p->want_frame = true; pl_cond_signal(&p->wakeup); while (p->want_frame) { if (pl_cond_timedwait(&p->wakeup, &p->lock_weak, params->timeout) == ETIMEDOUT) return PL_QUEUE_MORE; } return p->eof ? PL_QUEUE_EOF : PL_QUEUE_OK; } // Don't hold the weak mutex while calling into `get_frame`, to allow // `pl_queue_push` to run concurrently while we're waiting for frames pl_mutex_unlock(&p->lock_weak); struct pl_source_frame src; enum pl_queue_status ret; switch ((ret = params->get_frame(&src, params))) { case PL_QUEUE_OK: pl_queue_push(p, &src); break; case PL_QUEUE_EOF: pl_queue_push(p, NULL); break; case PL_QUEUE_MORE: case PL_QUEUE_ERR: break; } pl_mutex_lock(&p->lock_weak); return ret; } static bool map_frame(pl_queue p, struct entry *entry) { if (!entry->mapped) { PL_TRACE(p, "Mapping frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); entry->mapped = true; entry->ok = entry->src.map(p->gpu, entry->cache.tex, &entry->src, &entry->frame); if (!entry->ok) PL_ERR(p, "Failed mapping frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); } return entry->ok; } // Advance the queue as needed to make sure idx 0 is the last frame before // `pts`, and idx 1 is the first frame after `pts` (unless this is the last). // // Returns PL_QUEUE_OK only if idx 0 is still legal under ZOH semantics. static enum pl_queue_status advance(pl_queue p, float pts, const struct pl_queue_params *params) { // Cull all frames except the last frame before `pts` int culled = 0; for (int i = 1; i < p->queue.num; i++) { if (p->queue.elem[i]->src.pts <= pts) { cull_entry(p, p->queue.elem[i - 1]); culled++; } } PL_ARRAY_REMOVE_RANGE(p->queue, 0, culled); // Keep adding new frames until we find one in the future, or EOF while (p->queue.num < 2) { enum pl_queue_status ret; switch ((ret = get_frame(p, params))) { case PL_QUEUE_ERR: return ret; case PL_QUEUE_EOF: if (!p->queue.num) return ret; goto done; case PL_QUEUE_MORE: case PL_QUEUE_OK: while (p->queue.num > 1 && p->queue.elem[1]->src.pts <= pts) { cull_entry(p, p->queue.elem[0]); PL_ARRAY_REMOVE_AT(p->queue, 0); } if (ret == PL_QUEUE_MORE) return ret; continue; } } done: if (p->eof && p->queue.num == 1) { if (p->queue.elem[0]->src.pts == 0.0 || !p->fps.estimate) { // If the last frame has PTS 0.0, or we have no FPS estimate, then // this is probably a single-frame file, in which case we want to // extend the ZOH to infinity, rather than returning. Not a perfect // heuristic, but w/e return PL_QUEUE_OK; } // Last frame is held for an extra `p->fps.estimate` duration, // afterwards this function just returns EOF. if (p->queue.elem[0]->src.pts + p->fps.estimate < pts) { cull_entry(p, p->queue.elem[0]); p->queue.num = 0; return PL_QUEUE_EOF; } } pl_assert(p->queue.num); return PL_QUEUE_OK; } static inline enum pl_queue_status point(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { // Find closest frame (nearest neighbour semantics) pl_assert(p->queue.num); struct entry *entry = p->queue.elem[0]; double best = fabs(entry->src.pts - params->pts); for (int i = 1; i < p->queue.num; i++) { double dist = fabs(p->queue.elem[i]->src.pts - params->pts); if (dist < best) { entry = p->queue.elem[i]; best = dist; continue; } else { break; } } if (!map_frame(p, entry)) return PL_QUEUE_ERR; // Return a mix containing only this single frame p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); PL_ARRAY_APPEND(p, p->tmp_ts, 0.0); *mix = (struct pl_frame_mix) { .num_frames = 1, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = 1.0, }; PL_TRACE(p, "Showing single frame id %"PRIu64" with PTS %f for target PTS %f", entry->signature, entry->src.pts, params->pts); report_estimates(p); return PL_QUEUE_OK; } // Present a single frame as appropriate for `pts` static enum pl_queue_status nearest(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { enum pl_queue_status ret; switch ((ret = advance(p, params->pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_OK: break; case PL_QUEUE_MORE: if (!p->queue.num) { if (mix) *mix = (struct pl_frame_mix) {0}; return ret; } break; } if (!mix) return PL_QUEUE_OK; return point(p, mix, params); } // Special case of `interpolate` for radius = 0, in which case we need exactly // the previous frame and the following frame static enum pl_queue_status oversample(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { enum pl_queue_status ret; switch ((ret = advance(p, params->pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_OK: break; case PL_QUEUE_MORE: if (!p->queue.num) { if (mix) *mix = (struct pl_frame_mix) {0}; return ret; } break; } if (!mix) return PL_QUEUE_OK; // Can't oversample with only a single frame, fall back to point sampling if (p->queue.num < 2 || p->queue.elem[0]->src.pts > params->pts) { if (point(p, mix, params) != PL_QUEUE_OK) return PL_QUEUE_ERR; return ret; } struct entry *entries[2] = { p->queue.elem[0], p->queue.elem[1] }; pl_assert(entries[0]->src.pts <= params->pts); pl_assert(entries[1]->src.pts >= params->pts); // Returning a mix containing both of these two frames p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; for (int i = 0; i < 2; i++) { if (!map_frame(p, entries[i])) return PL_QUEUE_ERR; float ts = (entries[i]->src.pts - params->pts) / p->fps.estimate; PL_ARRAY_APPEND(p, p->tmp_sig, entries[i]->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entries[i]->frame); PL_ARRAY_APPEND(p, p->tmp_ts, ts); } *mix = (struct pl_frame_mix) { .num_frames = 2, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = p->vps.estimate / p->fps.estimate, }; PL_TRACE(p, "Oversampling 2 frames for target PTS %f:", params->pts); for (int i = 0; i < mix->num_frames; i++) PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); report_estimates(p); return ret; } // Present a mixture of frames, relative to the vsync ratio static enum pl_queue_status interpolate(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { // No FPS estimate available, possibly source contains only a single frame, // or this is the first frame to be rendered. Fall back to point sampling. if (!p->fps.estimate) return nearest(p, mix, params); // Silently disable interpolation if the ratio dips lower than the // configured threshold float ratio = fabs(p->fps.estimate / p->vps.estimate - 1.0); if (ratio < params->interpolation_threshold) { if (!p->threshold_frames) { PL_INFO(p, "Detected fps ratio %.4f below threshold %.4f, " "disabling interpolation", ratio, params->interpolation_threshold); } p->threshold_frames = THRESHOLD_FRAMES + 1; return nearest(p, mix, params); } else if (ratio < THRESHOLD_MAX_RATIO && p->threshold_frames > 1) { p->threshold_frames--; return nearest(p, mix, params); } else { if (p->threshold_frames) { PL_INFO(p, "Detected fps ratio %.4f exceeds threshold %.4f, " "re-enabling interpolation", ratio, params->interpolation_threshold); } p->threshold_frames = 0; } // No radius information, special case in which we only need the previous // and next frames. if (!params->radius) return oversample(p, mix, params); float min_pts = params->pts - params->radius * p->fps.estimate, max_pts = params->pts + params->radius * p->fps.estimate; enum pl_queue_status ret; switch ((ret = advance(p, min_pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_MORE: goto done; case PL_QUEUE_OK: break; } // Keep adding new frames until we've covered the range we care about pl_assert(p->queue.num); while (p->queue.elem[p->queue.num - 1]->src.pts < max_pts) { switch ((ret = get_frame(p, params))) { case PL_QUEUE_ERR: return ret; case PL_QUEUE_MORE: case PL_QUEUE_EOF: goto done; case PL_QUEUE_OK: continue; } } done: ; if (!mix) return PL_QUEUE_OK; // Construct a mix object representing the current queue state, starting at // the last frame before `min_pts` to make sure there's a fallback frame // available for ZOH semantics. p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; for (int i = 0; i < p->queue.num; i++) { struct entry *entry = p->queue.elem[i]; if (entry->src.pts > max_pts) break; if (!map_frame(p, entry)) return PL_QUEUE_ERR; float ts = (entry->src.pts - params->pts) / p->fps.estimate; PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); PL_ARRAY_APPEND(p, p->tmp_ts, ts); } *mix = (struct pl_frame_mix) { .num_frames = p->tmp_frame.num, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = p->vps.estimate / p->fps.estimate, }; PL_TRACE(p, "Showing mix of %d frames for target PTS %f:", mix->num_frames, params->pts); for (int i = 0; i < mix->num_frames; i++) PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); report_estimates(p); return ret; } static bool prefill(pl_queue p, const struct pl_queue_params *params) { int min_frames = 2 * ceilf(params->radius); min_frames = PL_MAX(min_frames, PREFETCH_FRAMES); while (p->queue.num < min_frames) { switch (get_frame(p, params)) { case PL_QUEUE_ERR: return false; case PL_QUEUE_EOF: case PL_QUEUE_MORE: return true; case PL_QUEUE_OK: continue; } } // In the most likely case, the first few frames will all be required. So // force-map them all to initialize GPU state on initial rendering. This is // better than the alternative of missing the cache later, when timing is // more relevant. for (int i = 0; i < min_frames; i++) { if (!map_frame(p, p->queue.elem[i])) return false; } return true; } static inline void default_estimate(struct pool *pool, float val) { if (!pool->estimate && isnormal(val) && val > 0.0) pool->estimate = val; } enum pl_queue_status pl_queue_update(pl_queue p, struct pl_frame_mix *out_mix, const struct pl_queue_params *params) { pl_mutex_lock(&p->lock_strong); pl_mutex_lock(&p->lock_weak); default_estimate(&p->fps, params->frame_duration); default_estimate(&p->vps, params->vsync_duration); float delta = params->pts - p->prev_pts; if (delta < 0.0) { // This is a backwards PTS jump. This is something we can handle // semi-gracefully, but only if we haven't culled past the current // frame yet. if (p->queue.num && p->queue.elem[0]->src.pts > params->pts) { PL_ERR(p, "Requested PTS %f is lower than the oldest frame " "PTS %f. This is not supported, PTS must be monotonically " "increasing! Please use `pl_queue_reset` to reset the frame " "queue on discontinuous PTS jumps.", params->pts, p->queue.elem[0]->src.pts); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return PL_QUEUE_ERR; } } else if (delta > 1.0) { // A jump of more than a second is probably the result of a // discontinuous jump after a suspend. To prevent this from exploding // the FPS estimate, treat this as a new frame. PL_TRACE(p, "Discontinuous target PTS jump %f -> %f, ignoring...", p->prev_pts, params->pts); } else if (delta > 0) { update_estimate(&p->vps, params->pts - p->prev_pts); } p->prev_pts = params->pts; // As a special case, prefill the queue if this is the first frame if (!params->pts && !p->queue.num) { if (!prefill(p, params)) { pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return PL_QUEUE_ERR; } } // Ignore unrealistically high or low FPS, common near start of playback static const float max_vsync = 1.0 / MIN_FPS; static const float min_vsync = 1.0 / MAX_FPS; enum pl_queue_status ret; if (p->vps.estimate > min_vsync && p->vps.estimate < max_vsync) { // We know the vsync duration, so construct an interpolation mix ret = interpolate(p, out_mix, params); } else { // We don't know the vsync duration (yet), so just point-sample ret = nearest(p, out_mix, params); } pl_cond_signal(&p->wakeup); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return ret; } libplacebo-v4.192.1/src/utils/upload.c000066400000000000000000000230121417677245700175530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "log.h" #include "common.h" #include "gpu.h" #define MAX_COMPS 4 struct comp { int order; // e.g. 0, 1, 2, 3 for RGBA int size; // size in bits int shift; // bit-shift / offset in bits }; static int compare_comp(const void *pa, const void *pb) { const struct comp *a = pa, *b = pb; // Move all of the components with a size of 0 to the end, so they can // be ignored outright if (a->size && !b->size) return -1; if (b->size && !a->size) return 1; // Otherwise, just compare based on the shift return PL_CMP(a->shift, b->shift); } void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]) { struct comp comps[MAX_COMPS] = { {0}, {1}, {2}, {3} }; for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { comps[i].size = __builtin_popcountll(mask[i]); comps[i].shift = PL_MAX(0, __builtin_ffsll(mask[i]) - 1); // Sanity checking uint64_t mask_reconstructed = (1LLU << comps[i].size) - 1; mask_reconstructed <<= comps[i].shift; pl_assert(mask_reconstructed == mask[i]); } // Sort the components by shift qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp); // Generate the resulting component size/pad/map int offset = 0; for (int i = 0; i < MAX_COMPS; i++) { if (comps[i].size) { assert(comps[i].shift >= offset); data->component_size[i] = comps[i].size; data->component_pad[i] = comps[i].shift - offset; data->component_map[i] = comps[i].order; offset += data->component_size[i] + data->component_pad[i]; } else { // Clear the superfluous entries for sanity data->component_size[i] = 0; data->component_pad[i] = 0; data->component_map[i] = 0; } } } bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits) { struct pl_plane_data aligned = *data; struct pl_bit_encoding bits = {0}; int offset = 0; #define SET_TEST(var, value) \ do { \ if (offset == 0) { \ (var) = (value); \ } else if ((var) != (value)) { \ goto misaligned; \ } \ } while (0) for (int i = 0; i < MAX_COMPS; i++) { if (!aligned.component_size[i]) break; // Can't meaningfully align alpha channel, so just skip it. This is a // limitation of the fact that `pl_bit_encoding` only applies to the // main color channels, and changing this would be very nontrivial. if (aligned.component_map[i] == PL_CHANNEL_A) continue; // Color depth is the original component size, before alignment SET_TEST(bits.color_depth, aligned.component_size[i]); // Try consuming padding of the current component to align down. This // corresponds to an extra bit shift to the left. int comp_start = offset + aligned.component_pad[i]; int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8); left_delta = PL_MIN(left_delta, aligned.component_pad[i]); aligned.component_pad[i] -= left_delta; aligned.component_size[i] += left_delta; SET_TEST(bits.bit_shift, left_delta); // Try consuming padding of the next component to align up. This // corresponds to simply ignoring some extra 0s on the end. int comp_end = comp_start + aligned.component_size[i] - left_delta; int right_delta = PL_ALIGN2(comp_end, 8) - comp_end; if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) { // This is the last component, so we can be greedy aligned.component_size[i] += right_delta; } else { right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]); aligned.component_pad[i+1] -= right_delta; aligned.component_size[i] += right_delta; } // Sample depth is the new total component size, including padding SET_TEST(bits.sample_depth, aligned.component_size[i]); offset += aligned.component_pad[i] + aligned.component_size[i]; } // Easy sanity check, to make sure that we don't exceed the known stride if (aligned.pixel_stride && offset > aligned.pixel_stride * 8) goto misaligned; *data = aligned; if (out_bits) *out_bits = bits; return true; misaligned: // Can't properly align anything, so just do a no-op if (out_bits) *out_bits = (struct pl_bit_encoding) {0}; return false; } pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data) { int dummy[4] = {0}; out_map = PL_DEF(out_map, dummy); // Count the number of components and initialize out_map int num = 0; for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) { out_map[i] = -1; if (data->component_size[i]) num = i+1; } for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (fmt->opaque || fmt->num_components < num) continue; if (fmt->type != data->type || fmt->texel_size != data->pixel_stride) continue; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) continue; if (data->row_stride % fmt->texel_align) continue; // reject misaligned formats int idx = 0; // Try mapping all pl_plane_data components to texture components for (int i = 0; i < num; i++) { // If there's padding we have to map it to an unused physical // component first int pad = data->component_pad[i]; if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad)) goto next_fmt; // Otherwise, try and match this component int size = data->component_size[i]; if (size && (idx >= 4 || fmt->host_bits[idx] != size)) goto next_fmt; out_map[idx++] = data->component_map[i]; } return fmt; next_fmt: ; // acts as `continue` } return NULL; } bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { pl_assert(!data->buf ^ !data->pixels); // exactly one if (data->buf) { pl_assert(data->buf_offset == PL_ALIGN2(data->buf_offset, 4)); pl_assert(data->buf_offset == PL_ALIGN(data->buf_offset, data->pixel_stride)); } int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; // TODO: try soft-converting to a supported format using e.g zimg? } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .sampleable = true, .host_writable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } return pl_tex_upload(gpu, pl_tex_transfer_params( .tex = *tex, .row_pitch = data->row_stride, .ptr = (void *) data->pixels, .buf = data->buf, .buf_offset = data->buf_offset, .callback = data->callback, .priv = data->priv, )); } bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .renderable = true, .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, .storable = fmt->caps & PL_FMT_CAP_STORABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } return true; } libplacebo-v4.192.1/src/version.h.in000066400000000000000000000000431417677245700172250ustar00rootroot00000000000000#define BUILD_VERSION "@buildver@" libplacebo-v4.192.1/src/vulkan/000077500000000000000000000000001417677245700162655ustar00rootroot00000000000000libplacebo-v4.192.1/src/vulkan/command.c000066400000000000000000000332761417677245700200620ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "command.h" #include "utils.h" // returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error static VkResult vk_cmd_poll(struct vk_ctx *vk, struct vk_cmd *cmd, uint64_t timeout) { return vk->WaitForFences(vk->dev, 1, &cmd->fence, false, timeout); } static void flush_callbacks(struct vk_ctx *vk) { while (vk->num_pending_callbacks) { const struct vk_callback *cb = vk->pending_callbacks++; vk->num_pending_callbacks--; cb->run(cb->priv, cb->arg); } } static void vk_cmd_reset(struct vk_ctx *vk, struct vk_cmd *cmd) { // Flush possible callbacks left over from a previous command still in the // process of being reset, whose callback triggered this command being // reset. flush_callbacks(vk); vk->pending_callbacks = cmd->callbacks.elem; vk->num_pending_callbacks = cmd->callbacks.num; flush_callbacks(vk); cmd->callbacks.num = 0; cmd->deps.num = 0; cmd->depstages.num = 0; cmd->depvalues.num = 0; cmd->sigs.num = 0; cmd->sigvalues.num = 0; } static void vk_cmd_destroy(struct vk_ctx *vk, struct vk_cmd *cmd) { if (!cmd) return; vk_cmd_poll(vk, cmd, UINT64_MAX); vk_cmd_reset(vk, cmd); vk->DestroyFence(vk->dev, cmd->fence, PL_VK_ALLOC); vk->FreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf); pl_free(cmd); } static struct vk_cmd *vk_cmd_create(struct vk_ctx *vk, struct vk_cmdpool *pool) { struct vk_cmd *cmd = pl_zalloc_ptr(NULL, cmd); cmd->pool = pool; VkCommandBufferAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = pool->pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VK(vk->AllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf)); VkFenceCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = VK_FENCE_CREATE_SIGNALED_BIT, }; VK(vk->CreateFence(vk->dev, &finfo, PL_VK_ALLOC, &cmd->fence)); PL_VK_NAME(FENCE, cmd->fence, "cmd"); return cmd; error: vk_cmd_destroy(vk, cmd); vk->failed = true; return NULL; } void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg) { pl_mutex_lock(&vk->lock); if (vk->cmds_pending.num > 0) { struct vk_cmd *last_cmd = vk->cmds_pending.elem[vk->cmds_pending.num - 1]; vk_cmd_callback(last_cmd, callback, priv, arg); } else { // The device was already idle, so we can just immediately call it callback((void *) priv, (void *) arg); } pl_mutex_unlock(&vk->lock); } void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg) { PL_ARRAY_APPEND(cmd, cmd->callbacks, (struct vk_callback) { .run = callback, .priv = (void *) priv, .arg = (void *) arg, }); } void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags stage, pl_vulkan_sem dep) { assert(cmd->deps.num == cmd->depstages.num); assert(cmd->deps.num == cmd->depvalues.num); PL_ARRAY_APPEND(cmd, cmd->deps, dep.sem); PL_ARRAY_APPEND(cmd, cmd->depvalues, dep.value); PL_ARRAY_APPEND(cmd, cmd->depstages, stage); } void vk_cmd_sig(struct vk_cmd *cmd, pl_vulkan_sem sig) { assert(cmd->sigs.num == cmd->sigvalues.num); PL_ARRAY_APPEND(cmd, cmd->sigs, sig.sem); PL_ARRAY_APPEND(cmd, cmd->sigvalues, sig.value); } void vk_sem_uninit(struct vk_ctx *vk, struct vk_sem *sem) { vk->DestroySemaphore(vk->dev, sem->semaphore, PL_VK_ALLOC); *sem = (struct vk_sem) {0}; } bool vk_sem_init(struct vk_ctx *vk, struct vk_sem *sem, pl_debug_tag debug_tag) { *sem = (struct vk_sem) { .write.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, .read.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, }; static const VkSemaphoreTypeCreateInfo stinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, .initialValue = 0, }; static const VkSemaphoreCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .pNext = &stinfo, }; // We always create a semaphore, so we can perform host waits on it VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sem->semaphore)); PL_VK_NAME(SEMAPHORE, sem->semaphore, PL_DEF(debug_tag, "vk_sem")); return true; error: vk->failed = true; return false; } struct vk_sync_scope vk_sem_barrier(struct vk_ctx *vk, struct vk_cmd *cmd, struct vk_sem *sem, VkPipelineStageFlags stage, VkAccessFlags access, bool is_trans) { bool is_write = (access & vk_access_write) || is_trans; // Writes need to be synchronized against the last *read* (which is // transitively synchronized against the last write), reads only // need to be synchronized against the last write. struct vk_sync_scope last = sem->write; if (is_write && sem->read.access) last = sem->read; if (last.queue != cmd->queue) { if (!is_write && sem->read.queue == cmd->queue) { // No semaphore needed in this case because the implicit submission // order execution dependencies already transitively imply a wait // for the previous write } else if (last.queue) { vk_cmd_dep(cmd, stage, (pl_vulkan_sem) { .sem = sem->semaphore, .value = last.value, }); } last.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; last.access = 0; } if (!is_write && sem->read.queue == cmd->queue && (sem->read.stage & stage) == stage && (sem->read.access & access) == access) { // A past pipeline barrier already covers this access transitively, so // we don't need to emit another pipeline barrier at all last.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; last.access = 0; } pl_assert(sem->read.value >= sem->write.value); uint64_t next_value = sem->read.value + 1; vk_cmd_sig(cmd, (pl_vulkan_sem) { .sem = sem->semaphore, .value = next_value, }); if (is_write) { sem->write = (struct vk_sync_scope) { .value = next_value, .queue = cmd->queue, .stage = stage, .access = access, }; sem->read = (struct vk_sync_scope) { .value = next_value, .queue = cmd->queue, // no stage or access scope, because no reads happened yet }; } else if (sem->read.queue == cmd->queue) { // Coalesce multiple same-queue reads into a single access scope sem->read.value = next_value; sem->read.stage |= stage; sem->read.access |= access; } else { sem->read = (struct vk_sync_scope) { .value = next_value, .queue = cmd->queue, .stage = stage, .access = access, }; } return last; } struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, VkDeviceQueueCreateInfo qinfo, VkQueueFamilyProperties props) { struct vk_cmdpool *pool = pl_alloc_ptr(NULL, pool); *pool = (struct vk_cmdpool) { .props = props, .qf = qinfo.queueFamilyIndex, .queues = pl_calloc(pool, qinfo.queueCount, sizeof(VkQueue)), .num_queues = qinfo.queueCount, }; for (int n = 0; n < pool->num_queues; n++) vk->GetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]); VkCommandPoolCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = pool->qf, }; VK(vk->CreateCommandPool(vk->dev, &cinfo, PL_VK_ALLOC, &pool->pool)); return pool; error: vk_cmdpool_destroy(vk, pool); vk->failed = true; return NULL; } void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool) { if (!pool) return; for (int i = 0; i < pool->cmds.num; i++) vk_cmd_destroy(vk, pool->cmds.elem[i]); vk->DestroyCommandPool(vk->dev, pool->pool, PL_VK_ALLOC); pl_free(pool); } struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool) { // Garbage collect the cmdpool first, to increase the chances of getting // an already-available command buffer. vk_poll_commands(vk, 0); struct vk_cmd *cmd = NULL; pl_mutex_lock(&vk->lock); if (!PL_ARRAY_POP(pool->cmds, &cmd)) { cmd = vk_cmd_create(vk, pool); if (!cmd) { pl_mutex_unlock(&vk->lock); goto error; } } cmd->queue = pool->queues[pool->idx_queues]; pl_mutex_unlock(&vk->lock); VkCommandBufferBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK(vk->BeginCommandBuffer(cmd->buf, &binfo)); return cmd; error: // Something has to be seriously messed up if we get to this point vk_cmd_destroy(vk, cmd); vk->failed = true; return NULL; } bool vk_cmd_submit(struct vk_ctx *vk, struct vk_cmd **pcmd) { struct vk_cmd *cmd = *pcmd; if (!cmd) return true; *pcmd = NULL; struct vk_cmdpool *pool = cmd->pool; VK(vk->EndCommandBuffer(cmd->buf)); VK(vk->ResetFences(vk->dev, 1, &cmd->fence)); VkTimelineSemaphoreSubmitInfo tinfo = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .waitSemaphoreValueCount = cmd->depvalues.num, .pWaitSemaphoreValues = cmd->depvalues.elem, .signalSemaphoreValueCount = cmd->sigvalues.num, .pSignalSemaphoreValues = cmd->sigvalues.elem, }; VkSubmitInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &tinfo, .commandBufferCount = 1, .pCommandBuffers = &cmd->buf, .waitSemaphoreCount = cmd->deps.num, .pWaitSemaphores = cmd->deps.elem, .pWaitDstStageMask = cmd->depstages.elem, .signalSemaphoreCount = cmd->sigs.num, .pSignalSemaphores = cmd->sigs.elem, }; if (pl_msg_test(vk->log, PL_LOG_TRACE)) { PL_TRACE(vk, "Submitting command on queue %p (QF %d):", (void *)cmd->queue, pool->qf); for (int n = 0; n < cmd->deps.num; n++) { PL_TRACE(vk, " waits on semaphore %p = %"PRIu64, (void *) cmd->deps.elem[n], cmd->depvalues.elem[n]); } for (int n = 0; n < cmd->sigs.num; n++) { PL_TRACE(vk, " signals semaphore %p = %"PRIu64, (void *) cmd->sigs.elem[n], cmd->sigvalues.elem[n]); } PL_TRACE(vk, " signals fence %p", (void *) cmd->fence); if (cmd->callbacks.num) PL_TRACE(vk, " signals %d callbacks", cmd->callbacks.num); } VK(vk->QueueSubmit(cmd->queue, 1, &sinfo, cmd->fence)); pl_mutex_lock(&vk->lock); PL_ARRAY_APPEND(vk->alloc, vk->cmds_pending, cmd); pl_mutex_unlock(&vk->lock); return true; error: vk_cmd_reset(vk, cmd); pl_mutex_lock(&vk->lock); PL_ARRAY_APPEND(pool, pool->cmds, cmd); pl_mutex_unlock(&vk->lock); vk->failed = true; return false; } bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout) { bool ret = false; pl_mutex_lock(&vk->lock); while (vk->cmds_pending.num) { struct vk_cmd *cmd = vk->cmds_pending.elem[0]; struct vk_cmdpool *pool = cmd->pool; pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking if (vk_cmd_poll(vk, cmd, timeout) == VK_TIMEOUT) return ret; pl_mutex_lock(&vk->lock); if (!vk->cmds_pending.num || vk->cmds_pending.elem[0] != cmd) continue; // another thread modified this state while blocking PL_TRACE(vk, "VkFence signalled: %p", (void *) cmd->fence); PL_ARRAY_REMOVE_AT(vk->cmds_pending, 0); // remove before callbacks vk_cmd_reset(vk, cmd); PL_ARRAY_APPEND(pool, pool->cmds, cmd); ret = true; // If we've successfully spent some time waiting for at least one // command, disable the timeout. This has the dual purpose of both // making sure we don't over-wait due to repeat timeout application, // but also makes sure we don't block on future commands if we've // already spend time waiting for one. timeout = 0; } pl_mutex_unlock(&vk->lock); return ret; } void vk_rotate_queues(struct vk_ctx *vk) { pl_mutex_lock(&vk->lock); // Rotate the queues to ensure good parallelism across frames for (int i = 0; i < vk->pools.num; i++) { struct vk_cmdpool *pool = vk->pools.elem[i]; pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; PL_TRACE(vk, "QF %d: %d/%d", pool->qf, pool->idx_queues, pool->num_queues); } pl_mutex_unlock(&vk->lock); } void vk_wait_idle(struct vk_ctx *vk) { while (vk_poll_commands(vk, UINT64_MAX)) ; } libplacebo-v4.192.1/src/vulkan/command.h000066400000000000000000000146601417677245700200630ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Since lots of vulkan operations need to be done lazily once the affected // resources are no longer in use, provide an abstraction for tracking these. // In practice, these are only checked and run when submitting new commands, so // the actual execution may be delayed by a frame. typedef void (*vk_cb)(void *p, void *arg); struct vk_callback { vk_cb run; void *priv; void *arg; }; // Associate a callback with the completion of all currently pending commands. // This will essentially run once the device is completely idle. void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg); // Helper wrapper around command buffers that also track dependencies, // callbacks and synchronization primitives // // Thread-safety: Unsafe struct vk_cmd { struct vk_cmdpool *pool; // pool it was allocated from VkQueue queue; // the submission queue (for recording/pending) VkCommandBuffer buf; // the command buffer itself VkFence fence; // the fence guards cmd buffer reuse // The semaphores represent dependencies that need to complete before // this command can be executed. These are *not* owned by the vk_cmd PL_ARRAY(VkSemaphore) deps; PL_ARRAY(VkPipelineStageFlags) depstages; PL_ARRAY(uint64_t) depvalues; // The signals represent semaphores that fire once the command finishes // executing. These are also not owned by the vk_cmd PL_ARRAY(VkSemaphore) sigs; PL_ARRAY(uint64_t) sigvalues; // Since VkFences are useless, we have to manually track "callbacks" // to fire once the VkFence completes. These are used for multiple purposes, // ranging from garbage collection (resource deallocation) to fencing. PL_ARRAY(struct vk_callback) callbacks; }; // Associate a callback with the completion of the current command. This // function will be run once the command completes, or shortly thereafter. void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg); // Associate a raw dependency for the current command. This semaphore must // signal by the corresponding stage before the command may execute. void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags stage, pl_vulkan_sem dep); // Associate a raw signal with the current command. This semaphore will signal // after the command completes. void vk_cmd_sig(struct vk_cmd *cmd, pl_vulkan_sem sig); // Synchronization scope struct vk_sync_scope { uint64_t value; // last timeline semaphore value VkQueue queue; // source queue of last access VkPipelineStageFlags stage; // stage bitmask of last access VkAccessFlags access; // access type bitmask }; // Synchronization primitive struct vk_sem { // timeline semaphores, together with a pair of structs respectively // describing the last read and write access, separately VkSemaphore semaphore; struct vk_sync_scope read, write; }; bool vk_sem_init(struct vk_ctx *vk, struct vk_sem *sem, pl_debug_tag debug_tag); void vk_sem_uninit(struct vk_ctx *vk, struct vk_sem *sem); // Updates the `vk_sem` state for a given access. If `is_trans` is set, this // access is treated as a write (since it alters the resource's state). // // Returns a struct describing the previous access to a resource. A pipeline // barrier is only required if the previous access scope is nonzero. struct vk_sync_scope vk_sem_barrier(struct vk_ctx *vk, struct vk_cmd *cmd, struct vk_sem *sem, VkPipelineStageFlags stage, VkAccessFlags access, bool is_trans); // Command pool / queue family hybrid abstraction struct vk_cmdpool { VkQueueFamilyProperties props; int qf; // queue family index VkCommandPool pool; VkQueue *queues; int num_queues; int idx_queues; // Command buffers associated with this queue. These are available for // re-recording PL_ARRAY(struct vk_cmd *) cmds; }; // Set up a vk_cmdpool corresponding to a queue family. struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, VkDeviceQueueCreateInfo qinfo, VkQueueFamilyProperties props); void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool); // Fetch a command buffer from a command pool and begin recording to it. // Returns NULL on failure. struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool); // Finish recording a command buffer and submit it for execution. This function // takes over ownership of **cmd, and sets *cmd to NULL in doing so. bool vk_cmd_submit(struct vk_ctx *vk, struct vk_cmd **cmd); // Block until some commands complete executing. This is the only function that // actually processes the callbacks. Will wait at most `timeout` nanoseconds // for the completion of any command. The timeout may also be passed as 0, in // which case this function will not block, but only poll for completed // commands. Returns whether any forward progress was made. // // This does *not* flush any queued commands, forgetting to do so may result // in infinite loops if waiting for the completion of callbacks that were // never flushed! bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout); // Rotate through queues in each command pool. Call this once per frame, after // submitting all of the command buffers for that frame. Calling this more // often than that is possible but bad for performance. void vk_rotate_queues(struct vk_ctx *vk); // Wait until all commands are complete, i.e. the device is idle. This is // basically equivalent to calling `vk_poll_commands` with a timeout of // UINT64_MAX until it returns `false`. void vk_wait_idle(struct vk_ctx *vk); libplacebo-v4.192.1/src/vulkan/common.h000066400000000000000000000172531417677245700177360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #define VK_NO_PROTOTYPES #define VK_ENABLE_BETA_EXTENSIONS // for VK_KHR_portability_subset #include "../common.h" #include "../log.h" #include "../pl_thread.h" #ifdef PL_HAVE_WIN32 #include #include #endif // Vulkan allows the optional use of a custom allocator. We don't need one but // mark this parameter with a better name in case we ever decide to change this // in the future. (And to make the code more readable) #define PL_VK_ALLOC NULL // Type of a vulkan function that needs to be loaded #define PL_VK_FUN(name) PFN_vk##name name // Load a vulkan instance-level extension function directly (on the stack) #define PL_VK_LOAD_FUN(inst, name, get_addr) \ PL_VK_FUN(name) = (PFN_vk##name) get_addr(inst, "vk" #name); // Hard-coded limit on the number of pending commands, to avoid OOM loops #define PL_VK_MAX_PENDING_CMDS 1024 // Shitty compatibility alias for very old vulkan.h versions #ifndef VK_API_VERSION_1_2 #define VK_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, 0) #endif // Shared struct used to hold vulkan context information struct vk_ctx { pl_mutex lock; pl_vulkan vulkan; void *alloc; // host allocations bound to the lifetime of this vk_ctx struct vk_malloc *ma; // VRAM malloc layer pl_vk_inst internal_instance; pl_log log; VkInstance inst; VkPhysicalDevice physd; VkPhysicalDeviceLimits limits; VkPhysicalDeviceFeatures2 features; uint32_t api_ver; // device API version VkDevice dev; bool imported; // device was not created by us // Generic error flag for catching "failed" devices bool failed; // Enabled extensions PL_ARRAY(const char *) exts; // Command pools (one per queue family) PL_ARRAY(struct vk_cmdpool *) pools; // Pointers into `pools` struct vk_cmdpool *pool_graphics; // required struct vk_cmdpool *pool_compute; // optional struct vk_cmdpool *pool_transfer; // optional // Pending commands. These are shared for the entire mpvk_ctx to ensure // submission and callbacks are FIFO PL_ARRAY(struct vk_cmd *) cmds_pending; // submitted but not completed // Pending callbacks that still need to be drained before processing // callbacks for the next command (in case commands are recursively being // polled from another callback) const struct vk_callback *pending_callbacks; int num_pending_callbacks; // Instance-level function pointers PL_VK_FUN(CreateDevice); PL_VK_FUN(EnumerateDeviceExtensionProperties); PL_VK_FUN(GetDeviceProcAddr); PL_VK_FUN(GetInstanceProcAddr); PL_VK_FUN(GetPhysicalDeviceExternalBufferProperties); PL_VK_FUN(GetPhysicalDeviceExternalSemaphoreProperties); PL_VK_FUN(GetPhysicalDeviceFeatures2KHR); PL_VK_FUN(GetPhysicalDeviceFormatProperties); PL_VK_FUN(GetPhysicalDeviceFormatProperties2KHR); PL_VK_FUN(GetPhysicalDeviceImageFormatProperties2KHR); PL_VK_FUN(GetPhysicalDeviceMemoryProperties); PL_VK_FUN(GetPhysicalDeviceProperties); PL_VK_FUN(GetPhysicalDeviceProperties2); PL_VK_FUN(GetPhysicalDeviceQueueFamilyProperties); PL_VK_FUN(GetPhysicalDeviceSurfaceCapabilitiesKHR); PL_VK_FUN(GetPhysicalDeviceSurfaceFormatsKHR); PL_VK_FUN(GetPhysicalDeviceSurfacePresentModesKHR); PL_VK_FUN(GetPhysicalDeviceSurfaceSupportKHR); // Device-level function pointers PL_VK_FUN(AcquireNextImageKHR); PL_VK_FUN(AllocateCommandBuffers); PL_VK_FUN(AllocateDescriptorSets); PL_VK_FUN(AllocateMemory); PL_VK_FUN(BeginCommandBuffer); PL_VK_FUN(BindBufferMemory); PL_VK_FUN(BindImageMemory); PL_VK_FUN(CmdBeginDebugUtilsLabelEXT); PL_VK_FUN(CmdBeginRenderPass); PL_VK_FUN(CmdBindDescriptorSets); PL_VK_FUN(CmdBindIndexBuffer); PL_VK_FUN(CmdBindPipeline); PL_VK_FUN(CmdBindVertexBuffers); PL_VK_FUN(CmdBlitImage); PL_VK_FUN(CmdClearColorImage); PL_VK_FUN(CmdCopyBuffer); PL_VK_FUN(CmdCopyBufferToImage); PL_VK_FUN(CmdCopyImage); PL_VK_FUN(CmdCopyImageToBuffer); PL_VK_FUN(CmdDispatch); PL_VK_FUN(CmdDraw); PL_VK_FUN(CmdDrawIndexed); PL_VK_FUN(CmdEndDebugUtilsLabelEXT); PL_VK_FUN(CmdEndRenderPass); PL_VK_FUN(CmdPipelineBarrier); PL_VK_FUN(CmdPushConstants); PL_VK_FUN(CmdPushDescriptorSetKHR); PL_VK_FUN(CmdResetQueryPool); PL_VK_FUN(CmdSetEvent); PL_VK_FUN(CmdSetScissor); PL_VK_FUN(CmdSetViewport); PL_VK_FUN(CmdUpdateBuffer); PL_VK_FUN(CmdWaitEvents); PL_VK_FUN(CmdWriteTimestamp); PL_VK_FUN(CreateBuffer); PL_VK_FUN(CreateBufferView); PL_VK_FUN(CreateCommandPool); PL_VK_FUN(CreateComputePipelines); PL_VK_FUN(CreateDebugReportCallbackEXT); PL_VK_FUN(CreateDescriptorPool); PL_VK_FUN(CreateDescriptorSetLayout); PL_VK_FUN(CreateEvent); PL_VK_FUN(CreateFence); PL_VK_FUN(CreateFramebuffer); PL_VK_FUN(CreateGraphicsPipelines); PL_VK_FUN(CreateImage); PL_VK_FUN(CreateImageView); PL_VK_FUN(CreatePipelineCache); PL_VK_FUN(CreatePipelineLayout); PL_VK_FUN(CreateQueryPool); PL_VK_FUN(CreateRenderPass); PL_VK_FUN(CreateSampler); PL_VK_FUN(CreateSemaphore); PL_VK_FUN(CreateShaderModule); PL_VK_FUN(CreateSwapchainKHR); PL_VK_FUN(DestroyBuffer); PL_VK_FUN(DestroyBufferView); PL_VK_FUN(DestroyCommandPool); PL_VK_FUN(DestroyDebugReportCallbackEXT); PL_VK_FUN(DestroyDescriptorPool); PL_VK_FUN(DestroyDescriptorSetLayout); PL_VK_FUN(DestroyDevice); PL_VK_FUN(DestroyEvent); PL_VK_FUN(DestroyFence); PL_VK_FUN(DestroyFramebuffer); PL_VK_FUN(DestroyImage); PL_VK_FUN(DestroyImageView); PL_VK_FUN(DestroyInstance); PL_VK_FUN(DestroyPipeline); PL_VK_FUN(DestroyPipelineCache); PL_VK_FUN(DestroyPipelineLayout); PL_VK_FUN(DestroyQueryPool); PL_VK_FUN(DestroyRenderPass); PL_VK_FUN(DestroySampler); PL_VK_FUN(DestroySemaphore); PL_VK_FUN(DestroyShaderModule); PL_VK_FUN(DestroySwapchainKHR); PL_VK_FUN(EndCommandBuffer); PL_VK_FUN(FlushMappedMemoryRanges); PL_VK_FUN(FreeCommandBuffers); PL_VK_FUN(FreeMemory); PL_VK_FUN(GetBufferMemoryRequirements); PL_VK_FUN(GetDeviceQueue); PL_VK_FUN(GetImageDrmFormatModifierPropertiesEXT); PL_VK_FUN(GetImageMemoryRequirements2); PL_VK_FUN(GetImageSubresourceLayout); PL_VK_FUN(GetMemoryFdKHR); PL_VK_FUN(GetMemoryFdPropertiesKHR); PL_VK_FUN(GetMemoryHostPointerPropertiesEXT); PL_VK_FUN(GetPipelineCacheData); PL_VK_FUN(GetQueryPoolResults); PL_VK_FUN(GetSemaphoreFdKHR); PL_VK_FUN(GetSwapchainImagesKHR); PL_VK_FUN(InvalidateMappedMemoryRanges); PL_VK_FUN(MapMemory); PL_VK_FUN(QueuePresentKHR); PL_VK_FUN(QueueSubmit); PL_VK_FUN(ResetEvent); PL_VK_FUN(ResetFences); PL_VK_FUN(ResetQueryPoolEXT); PL_VK_FUN(SetDebugUtilsObjectNameEXT); PL_VK_FUN(SetHdrMetadataEXT); PL_VK_FUN(UpdateDescriptorSets); PL_VK_FUN(WaitForFences); PL_VK_FUN(WaitSemaphoresKHR); #ifdef PL_HAVE_WIN32 PL_VK_FUN(GetMemoryWin32HandleKHR); PL_VK_FUN(GetSemaphoreWin32HandleKHR); #endif }; libplacebo-v4.192.1/src/vulkan/context.c000066400000000000000000001621001417677245700201150ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "utils.h" #include "gpu.h" #ifdef PL_HAVE_VK_PROC_ADDR VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( VkInstance instance, const char* pName); #endif const struct pl_vk_inst_params pl_vk_inst_default_params = {0}; struct vk_fun { const char *name; size_t offset; bool device_level; }; struct vk_ext { const char *name; uint32_t core_ver; struct vk_fun *funs; }; #define PL_VK_INST_FUN(N) \ { .name = "vk" #N, \ .offset = offsetof(struct vk_ctx, N), \ } #define PL_VK_DEV_FUN(N) \ { .name = "vk" #N, \ .offset = offsetof(struct vk_ctx, N), \ .device_level = true, \ } // Table of optional vulkan instance extensions static const char *vk_instance_extensions[] = { VK_KHR_SURFACE_EXTENSION_NAME, VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, }; // List of mandatory instance-level function pointers, including functions // associated with mandatory instance extensions static const struct vk_fun vk_inst_funs[] = { PL_VK_INST_FUN(CreateDevice), PL_VK_INST_FUN(EnumerateDeviceExtensionProperties), PL_VK_INST_FUN(GetDeviceProcAddr), PL_VK_INST_FUN(GetPhysicalDeviceExternalBufferProperties), PL_VK_INST_FUN(GetPhysicalDeviceExternalSemaphoreProperties), PL_VK_INST_FUN(GetPhysicalDeviceFeatures2KHR), PL_VK_INST_FUN(GetPhysicalDeviceFormatProperties), PL_VK_INST_FUN(GetPhysicalDeviceFormatProperties2KHR), PL_VK_INST_FUN(GetPhysicalDeviceImageFormatProperties2KHR), PL_VK_INST_FUN(GetPhysicalDeviceMemoryProperties), PL_VK_INST_FUN(GetPhysicalDeviceProperties), PL_VK_INST_FUN(GetPhysicalDeviceProperties2), PL_VK_INST_FUN(GetPhysicalDeviceQueueFamilyProperties), // These are not actually mandatory, but they're universal enough that we // just load them unconditionally (in lieu of not having proper support for // loading arbitrary instance extensions). Their use is generally guarded // behind various VkSurfaceKHR values already being provided by the API // user (implying this extension is loaded). PL_VK_INST_FUN(GetPhysicalDeviceSurfaceCapabilitiesKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfaceFormatsKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfacePresentModesKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfaceSupportKHR), }; // Table of vulkan device extensions and functions they load, including // functions exported by dependent instance-level extensions static const struct vk_ext vk_device_extensions[] = { { .name = VK_KHR_SWAPCHAIN_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(AcquireNextImageKHR), PL_VK_DEV_FUN(CreateSwapchainKHR), PL_VK_DEV_FUN(DestroySwapchainKHR), PL_VK_DEV_FUN(GetSwapchainImagesKHR), PL_VK_DEV_FUN(QueuePresentKHR), {0} }, }, { .name = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(CmdPushDescriptorSetKHR), {0} }, }, { .name = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryFdKHR), {0} }, }, { .name = VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryFdPropertiesKHR), {0} }, #ifdef PL_HAVE_WIN32 }, { .name = VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryWin32HandleKHR), {0} }, #endif }, { .name = VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryHostPointerPropertiesEXT), {0} }, }, { .name = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetSemaphoreFdKHR), {0} }, #ifdef PL_HAVE_WIN32 }, { .name = VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetSemaphoreWin32HandleKHR), {0} }, #endif }, { .name = VK_EXT_PCI_BUS_INFO_EXTENSION_NAME, }, { .name = VK_EXT_HDR_METADATA_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(SetHdrMetadataEXT), {0} }, }, { .name = VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, .core_ver = VK_API_VERSION_1_2, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(ResetQueryPoolEXT), {0} }, }, { .name = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, .core_ver = VK_API_VERSION_1_2, }, { .name = VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(GetImageDrmFormatModifierPropertiesEXT), {0} }, }, { .name = VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, .core_ver = VK_API_VERSION_1_2, .funs = (struct vk_fun[]) { PL_VK_DEV_FUN(WaitSemaphoresKHR), {0} }, #ifdef VK_KHR_portability_subset }, { .name = VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, #endif }, }; // Make sure to keep this in sync with the above! const char * const pl_vulkan_recommended_extensions[] = { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, #ifdef PL_HAVE_WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, #endif VK_EXT_PCI_BUS_INFO_EXTENSION_NAME, VK_EXT_HDR_METADATA_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, #ifdef VK_KHR_portability_subset VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, #endif }; const int pl_vulkan_num_recommended_extensions = PL_ARRAY_SIZE(pl_vulkan_recommended_extensions); // +1 because VK_KHR_swapchain is not automatically pulled in static_assert(PL_ARRAY_SIZE(pl_vulkan_recommended_extensions) + 1 == PL_ARRAY_SIZE(vk_device_extensions), "pl_vulkan_recommended_extensions out of sync with " "vk_device_extensions?"); // pNext chain of features we want enabled static const VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphores = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, .timelineSemaphore = true, }; static const VkPhysicalDeviceHostQueryResetFeatures host_query_reset = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, .pNext = (void *) &timeline_semaphores, .hostQueryReset = true, }; const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = (void *) &host_query_reset, .features = { .shaderImageGatherExtended = true, .shaderStorageImageReadWithoutFormat = true, .shaderStorageImageWriteWithoutFormat = true, // Needed for GPU-assisted validation, but not harmful to enable .fragmentStoresAndAtomics = true, .vertexPipelineStoresAndAtomics = true, .shaderInt64 = true, } }; // List of mandatory device-level functions // // Note: Also includes VK_EXT_debug_utils functions, even though they aren't // mandatory, simply because we load that extension in a special way. static const struct vk_fun vk_dev_funs[] = { PL_VK_DEV_FUN(AllocateCommandBuffers), PL_VK_DEV_FUN(AllocateDescriptorSets), PL_VK_DEV_FUN(AllocateMemory), PL_VK_DEV_FUN(BeginCommandBuffer), PL_VK_DEV_FUN(BindBufferMemory), PL_VK_DEV_FUN(BindImageMemory), PL_VK_DEV_FUN(CmdBeginDebugUtilsLabelEXT), PL_VK_DEV_FUN(CmdBeginRenderPass), PL_VK_DEV_FUN(CmdBindDescriptorSets), PL_VK_DEV_FUN(CmdBindIndexBuffer), PL_VK_DEV_FUN(CmdBindPipeline), PL_VK_DEV_FUN(CmdBindVertexBuffers), PL_VK_DEV_FUN(CmdBlitImage), PL_VK_DEV_FUN(CmdClearColorImage), PL_VK_DEV_FUN(CmdCopyBuffer), PL_VK_DEV_FUN(CmdCopyBufferToImage), PL_VK_DEV_FUN(CmdCopyImage), PL_VK_DEV_FUN(CmdCopyImageToBuffer), PL_VK_DEV_FUN(CmdDispatch), PL_VK_DEV_FUN(CmdDraw), PL_VK_DEV_FUN(CmdDrawIndexed), PL_VK_DEV_FUN(CmdEndDebugUtilsLabelEXT), PL_VK_DEV_FUN(CmdEndRenderPass), PL_VK_DEV_FUN(CmdPipelineBarrier), PL_VK_DEV_FUN(CmdPushConstants), PL_VK_DEV_FUN(CmdResetQueryPool), PL_VK_DEV_FUN(CmdSetEvent), PL_VK_DEV_FUN(CmdSetScissor), PL_VK_DEV_FUN(CmdSetViewport), PL_VK_DEV_FUN(CmdUpdateBuffer), PL_VK_DEV_FUN(CmdWaitEvents), PL_VK_DEV_FUN(CmdWriteTimestamp), PL_VK_DEV_FUN(CreateBuffer), PL_VK_DEV_FUN(CreateBufferView), PL_VK_DEV_FUN(CreateCommandPool), PL_VK_DEV_FUN(CreateComputePipelines), PL_VK_DEV_FUN(CreateDescriptorPool), PL_VK_DEV_FUN(CreateDescriptorSetLayout), PL_VK_DEV_FUN(CreateEvent), PL_VK_DEV_FUN(CreateFence), PL_VK_DEV_FUN(CreateFramebuffer), PL_VK_DEV_FUN(CreateGraphicsPipelines), PL_VK_DEV_FUN(CreateImage), PL_VK_DEV_FUN(CreateImageView), PL_VK_DEV_FUN(CreatePipelineCache), PL_VK_DEV_FUN(CreatePipelineLayout), PL_VK_DEV_FUN(CreateQueryPool), PL_VK_DEV_FUN(CreateRenderPass), PL_VK_DEV_FUN(CreateSampler), PL_VK_DEV_FUN(CreateSemaphore), PL_VK_DEV_FUN(CreateShaderModule), PL_VK_DEV_FUN(DestroyBuffer), PL_VK_DEV_FUN(DestroyBufferView), PL_VK_DEV_FUN(DestroyCommandPool), PL_VK_DEV_FUN(DestroyDescriptorPool), PL_VK_DEV_FUN(DestroyDescriptorSetLayout), PL_VK_DEV_FUN(DestroyDevice), PL_VK_DEV_FUN(DestroyEvent), PL_VK_DEV_FUN(DestroyFence), PL_VK_DEV_FUN(DestroyFramebuffer), PL_VK_DEV_FUN(DestroyImage), PL_VK_DEV_FUN(DestroyImageView), PL_VK_DEV_FUN(DestroyInstance), PL_VK_DEV_FUN(DestroyPipeline), PL_VK_DEV_FUN(DestroyPipelineCache), PL_VK_DEV_FUN(DestroyPipelineLayout), PL_VK_DEV_FUN(DestroyQueryPool), PL_VK_DEV_FUN(DestroyRenderPass), PL_VK_DEV_FUN(DestroySampler), PL_VK_DEV_FUN(DestroySemaphore), PL_VK_DEV_FUN(DestroyShaderModule), PL_VK_DEV_FUN(EndCommandBuffer), PL_VK_DEV_FUN(FlushMappedMemoryRanges), PL_VK_DEV_FUN(FreeCommandBuffers), PL_VK_DEV_FUN(FreeMemory), PL_VK_DEV_FUN(GetBufferMemoryRequirements), PL_VK_DEV_FUN(GetDeviceQueue), PL_VK_DEV_FUN(GetImageMemoryRequirements2), PL_VK_DEV_FUN(GetImageSubresourceLayout), PL_VK_DEV_FUN(GetPipelineCacheData), PL_VK_DEV_FUN(GetQueryPoolResults), PL_VK_DEV_FUN(InvalidateMappedMemoryRanges), PL_VK_DEV_FUN(MapMemory), PL_VK_DEV_FUN(QueueSubmit), PL_VK_DEV_FUN(ResetEvent), PL_VK_DEV_FUN(ResetFences), PL_VK_DEV_FUN(SetDebugUtilsObjectNameEXT), PL_VK_DEV_FUN(UpdateDescriptorSets), PL_VK_DEV_FUN(WaitForFences), }; static void load_vk_fun(struct vk_ctx *vk, const struct vk_fun *fun) { PFN_vkVoidFunction *pfn = (void *) ((uintptr_t) vk + (ptrdiff_t) fun->offset); if (fun->device_level) { *pfn = vk->GetDeviceProcAddr(vk->dev, fun->name); } else { *pfn = vk->GetInstanceProcAddr(vk->inst, fun->name); }; if (!*pfn) { // Some functions get their extension suffix stripped when promoted // to core. As a very simple work-around to this, try loading the // function a second time with the reserved suffixes stripped. static const char *ext_suffixes[] = { "KHR", "EXT" }; pl_str fun_name = pl_str0(fun->name); char buf[64]; for (int i = 0; i < PL_ARRAY_SIZE(ext_suffixes); i++) { if (!pl_str_eatend0(&fun_name, ext_suffixes[i])) continue; pl_assert(sizeof(buf) > fun_name.len); snprintf(buf, sizeof(buf), "%.*s", PL_STR_FMT(fun_name)); if (fun->device_level) { *pfn = vk->GetDeviceProcAddr(vk->dev, buf); } else { *pfn = vk->GetInstanceProcAddr(vk->inst, buf); } return; } } } // Private struct for pl_vk_inst struct priv { VkDebugUtilsMessengerEXT debug_utils_cb; }; void pl_vk_inst_destroy(pl_vk_inst *inst_ptr) { pl_vk_inst inst = *inst_ptr; if (!inst) return; struct priv *p = PL_PRIV(inst); if (p->debug_utils_cb) { PL_VK_LOAD_FUN(inst->instance, DestroyDebugUtilsMessengerEXT, inst->get_proc_addr); DestroyDebugUtilsMessengerEXT(inst->instance, p->debug_utils_cb, PL_VK_ALLOC); } PL_VK_LOAD_FUN(inst->instance, DestroyInstance, inst->get_proc_addr); DestroyInstance(inst->instance, PL_VK_ALLOC); pl_free_ptr((void **) inst_ptr); } static VkBool32 VKAPI_PTR vk_dbg_utils_cb(VkDebugUtilsMessageSeverityFlagBitsEXT sev, VkDebugUtilsMessageTypeFlagsEXT msgType, const VkDebugUtilsMessengerCallbackDataEXT *data, void *priv) { pl_log log = priv; // MSAN really doesn't like reading from the stack-allocated memory // allocated by the non-instrumented vulkan library, so just comment it out // when building with MSAN as a cheap hack-around. #ifndef MSAN // Ignore errors for messages that we consider false positives switch (data->messageIdNumber) { case 0x7cd0911d: // VUID-VkSwapchainCreateInfoKHR-imageExtent-01274 case 0x8928392f: // UNASSIGNED-BestPractices-NonSuccess-Result case 0xdc18ad6b: // UNASSIGNED-BestPractices-vkAllocateMemory-small-allocation case 0xb3d4346b: // UNASSIGNED-BestPractices-vkBindMemory-small-dedicated-allocation return false; // Work-around for validation layers bug case 0xc05b3a9d: // VUID-vkMapMemory-memory-00683 return false; case 0x5f379b89: // UNASSIGNED-BestPractices-Error-Result if (strstr(data->pMessage, "VK_ERROR_FORMAT_NOT_SUPPORTED")) return false; break; } #endif enum pl_log_level lev; switch (sev) { case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: lev = PL_LOG_ERR; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: lev = PL_LOG_WARN; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: lev = PL_LOG_DEBUG; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: lev = PL_LOG_TRACE; break; default: lev = PL_LOG_INFO; break; } pl_msg(log, lev, "vk %s", data->pMessage); #ifndef MSAN for (int i = 0; i < data->queueLabelCount; i++) pl_msg(log, lev, " during %s", data->pQueueLabels[i].pLabelName); for (int i = 0; i < data->cmdBufLabelCount; i++) pl_msg(log, lev, " inside %s", data->pCmdBufLabels[i].pLabelName); for (int i = 0; i < data->objectCount; i++) { const VkDebugUtilsObjectNameInfoEXT *obj = &data->pObjects[i]; pl_msg(log, lev, " using %s: %s (0x%llx)", vk_obj_type(obj->objectType), obj->pObjectName ? obj->pObjectName : "anon", (unsigned long long) obj->objectHandle); } #endif // The return value of this function determines whether the call will // be explicitly aborted (to prevent GPU errors) or not. In this case, // we generally want this to be on for the validation errors, but nothing // else (e.g. performance warnings) bool is_error = (sev & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) && (msgType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT); if (is_error) pl_log_stack_trace(log, lev); return is_error; } static PFN_vkGetInstanceProcAddr get_proc_addr_fallback(pl_log log, PFN_vkGetInstanceProcAddr get_proc_addr) { if (get_proc_addr) return get_proc_addr; #ifdef PL_HAVE_VK_PROC_ADDR return vkGetInstanceProcAddr; #else pl_fatal(log, "No `vkGetInstanceProcAddr` function provided, and " "libplacebo built without linking against this function!"); return NULL; #endif } #define PRINTF_VER(ver) \ (int) VK_VERSION_MAJOR(ver), \ (int) VK_VERSION_MINOR(ver), \ (int) VK_VERSION_PATCH(ver) pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params) { void *tmp = pl_tmp(NULL); params = PL_DEF(params, &pl_vk_inst_default_params); VkInstance inst = NULL; PL_ARRAY(const char *) exts = {0}; PFN_vkGetInstanceProcAddr get_addr; if (!(get_addr = get_proc_addr_fallback(log, params->get_proc_addr))) goto error; // Query instance version support uint32_t api_ver = VK_API_VERSION_1_0; PL_VK_LOAD_FUN(NULL, EnumerateInstanceVersion, get_addr); if (EnumerateInstanceVersion && EnumerateInstanceVersion(&api_ver) != VK_SUCCESS) goto error; pl_debug(log, "Available instance version: %d.%d.%d", PRINTF_VER(api_ver)); if (params->max_api_version) { api_ver = PL_MIN(api_ver, params->max_api_version); pl_info(log, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(api_ver)); } if (api_ver < VK_API_VERSION_1_1) { pl_fatal(log, "Instance API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(api_ver), PRINTF_VER(VK_API_VERSION_1_1)); goto error; } VkInstanceCreateInfo info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &(VkApplicationInfo) { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .apiVersion = api_ver, }, }; #ifdef VK_EXT_validation_features // Try enabling as many validation features as possible. Ignored for // instances not supporting VK_EXT_validation_features. VkValidationFeatureEnableEXT validation_features[] = { VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, # if VK_EXT_VALIDATION_FEATURES_SPEC_VERSION >= 2 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, # endif }; VkValidationFeaturesEXT vinfo = { .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, .pEnabledValidationFeatures = validation_features, .enabledValidationFeatureCount = PL_ARRAY_SIZE(validation_features), }; if (params->debug_extra) info.pNext = &vinfo; #else if (params->debug_extra) { pl_warn(log, "Enabled extra debugging but vulkan headers too old to " "support it, please update vulkan and recompile libplacebo!"); } #endif // Enumerate all supported layers PL_VK_LOAD_FUN(NULL, EnumerateInstanceLayerProperties, get_addr); uint32_t num_layers_avail = 0; EnumerateInstanceLayerProperties(&num_layers_avail, NULL); VkLayerProperties *layers_avail = pl_calloc_ptr(tmp, num_layers_avail, layers_avail); EnumerateInstanceLayerProperties(&num_layers_avail, layers_avail); pl_debug(log, "Available layers:"); for (int i = 0; i < num_layers_avail; i++) { pl_debug(log, " %s (v%d.%d.%d)", layers_avail[i].layerName, PRINTF_VER(layers_avail[i].specVersion)); } PL_ARRAY(const char *) layers = {0}; // Sorted by priority static const char *debug_layers[] = { "VK_LAYER_KHRONOS_validation", "VK_LAYER_LUNARG_standard_validation", }; // This layer has to be initialized first, otherwise all sorts of weirdness // happens (random segfaults, yum) bool debug = params->debug; if (debug) { for (int i = 0; i < PL_ARRAY_SIZE(debug_layers); i++) { for (int n = 0; n < num_layers_avail; n++) { if (strcmp(debug_layers[i], layers_avail[n].layerName) != 0) continue; pl_info(log, "Enabling debug meta layer: %s", debug_layers[i]); PL_ARRAY_APPEND(tmp, layers, debug_layers[i]); goto debug_layers_done; } } // No layer found.. pl_warn(log, "API debugging requested but no debug meta layers present... ignoring"); debug = false; } debug_layers_done: ; for (int i = 0; i < params->num_layers; i++) PL_ARRAY_APPEND(tmp, layers, params->layers[i]); for (int i = 0; i < params->num_opt_layers; i++) { const char *layer = params->opt_layers[i]; for (int n = 0; n < num_layers_avail; n++) { if (strcmp(layer, layers_avail[n].layerName) == 0) { PL_ARRAY_APPEND(tmp, layers, layer); break; } } } // Enumerate all supported extensions PL_VK_LOAD_FUN(NULL, EnumerateInstanceExtensionProperties, get_addr); uint32_t num_exts_avail = 0; EnumerateInstanceExtensionProperties(NULL, &num_exts_avail, NULL); VkExtensionProperties *exts_avail = pl_calloc_ptr(tmp, num_exts_avail, exts_avail); EnumerateInstanceExtensionProperties(NULL, &num_exts_avail, exts_avail); struct { VkExtensionProperties *exts; uint32_t num_exts; } *layer_exts = pl_calloc_ptr(tmp, num_layers_avail, layer_exts); // Enumerate extensions from layers for (int i = 0; i < num_layers_avail; i++) { VkExtensionProperties **lexts = &layer_exts[i].exts; uint32_t *num = &layer_exts[i].num_exts; EnumerateInstanceExtensionProperties(layers_avail[i].layerName, num, NULL); *lexts = pl_calloc_ptr(tmp, *num, *lexts); EnumerateInstanceExtensionProperties(layers_avail[i].layerName, num, *lexts); // Replace all extensions that are already available globally by {0} for (int j = 0; j < *num; j++) { for (int k = 0; k < num_exts_avail; k++) { if (strcmp((*lexts)[j].extensionName, exts_avail[k].extensionName) == 0) (*lexts)[j] = (VkExtensionProperties) {0}; } } } pl_debug(log, "Available instance extensions:"); for (int i = 0; i < num_exts_avail; i++) pl_debug(log, " %s", exts_avail[i].extensionName); for (int i = 0; i < num_layers_avail; i++) { for (int j = 0; j < layer_exts[i].num_exts; j++) { if (!layer_exts[i].exts[j].extensionName[0]) continue; pl_debug(log, " %s (via %s)", layer_exts[i].exts[j].extensionName, layers_avail[i].layerName); } } // Add mandatory extensions PL_ARRAY_APPEND(tmp, exts, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); // Add optional extensions for (int i = 0; i < PL_ARRAY_SIZE(vk_instance_extensions); i++) { const char *ext = vk_instance_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); break; } } } // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) { const char *ext = params->extensions[i]; PL_ARRAY_APPEND(tmp, exts, ext); // Enable any additional layers that are required for this extension for (int n = 0; n < num_layers_avail; n++) { for (int j = 0; j < layer_exts[n].num_exts; j++) { if (!layer_exts[n].exts[j].extensionName[0]) continue; if (strcmp(ext, layer_exts[n].exts[j].extensionName) == 0) { PL_ARRAY_APPEND(tmp, layers, layers_avail[n].layerName); goto next_user_ext; } } } next_user_ext: ; } // Add extra optional user extensions for (int i = 0; i < params->num_opt_extensions; i++) { const char *ext = params->opt_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); goto next_opt_user_ext; } } for (int n = 0; n < num_layers_avail; n++) { for (int j = 0; j < layer_exts[n].num_exts; j++) { if (!layer_exts[n].exts[j].extensionName[0]) continue; if (strcmp(ext, layer_exts[n].exts[j].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); PL_ARRAY_APPEND(tmp, layers, layers_avail[n].layerName); goto next_opt_user_ext; } } } next_opt_user_ext: ; } // If debugging is enabled, load the necessary debug utils extension if (debug) { for (int n = 0; n < num_exts_avail; n++) { const char * const debug_ext = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; if (strcmp(debug_ext, exts_avail[n].extensionName) != 0) continue; pl_info(log, "Enabling debug report extension: %s", debug_ext); PL_ARRAY_APPEND(tmp, exts, debug_ext); goto debug_ext_done; } // No extension found pl_warn(log, "API debug layers enabled but no debug report extension " "found... ignoring. Debug messages may be spilling to " "stdout/stderr!"); debug = false; } debug_ext_done: ; info.ppEnabledExtensionNames = exts.elem; info.enabledExtensionCount = exts.num; info.ppEnabledLayerNames = layers.elem; info.enabledLayerCount = layers.num; pl_info(log, "Creating vulkan instance%s", exts.num ? " with extensions:" : ""); for (int i = 0; i < exts.num; i++) pl_info(log, " %s", exts.elem[i]); if (layers.num) { pl_info(log, " and layers:"); for (int i = 0; i < layers.num; i++) pl_info(log, " %s", layers.elem[i]); } PL_VK_LOAD_FUN(NULL, CreateInstance, get_addr); VkResult res = CreateInstance(&info, PL_VK_ALLOC, &inst); if (res != VK_SUCCESS) { pl_fatal(log, "Failed creating instance: %s", vk_res_str(res)); goto error; } struct pl_vk_inst *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct priv); struct priv *p = PL_PRIV(pl_vk); *pl_vk = (struct pl_vk_inst) { .instance = inst, .api_version = api_ver, .get_proc_addr = get_addr, .extensions = pl_steal(pl_vk, exts.elem), .num_extensions = exts.num, .layers = pl_steal(pl_vk, layers.elem), .num_layers = layers.num, }; // Set up a debug callback to catch validation messages if (debug) { VkDebugUtilsMessengerCreateInfoEXT dinfo = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, .pfnUserCallback = vk_dbg_utils_cb, .pUserData = (void *) log, }; PL_VK_LOAD_FUN(inst, CreateDebugUtilsMessengerEXT, get_addr); CreateDebugUtilsMessengerEXT(inst, &dinfo, PL_VK_ALLOC, &p->debug_utils_cb); } pl_free(tmp); return pl_vk; error: pl_fatal(log, "Failed initializing vulkan instance"); if (inst) { PL_VK_LOAD_FUN(inst, DestroyInstance, get_addr); DestroyInstance(inst, PL_VK_ALLOC); } pl_free(tmp); return NULL; } const struct pl_vulkan_params pl_vulkan_default_params = { PL_VULKAN_DEFAULTS }; void pl_vulkan_destroy(pl_vulkan *pl_vk) { if (!*pl_vk) return; struct vk_ctx *vk = PL_PRIV(*pl_vk); pl_gpu_destroy((*pl_vk)->gpu); vk_malloc_destroy(&vk->ma); if (vk->dev) { PL_DEBUG(vk, "Waiting for remaining commands..."); vk_wait_idle(vk); pl_assert(vk->cmds_pending.num == 0); for (int i = 0; i < vk->pools.num; i++) vk_cmdpool_destroy(vk, vk->pools.elem[i]); if (!vk->imported) vk->DestroyDevice(vk->dev, PL_VK_ALLOC); } pl_vk_inst_destroy(&vk->internal_instance); pl_mutex_destroy(&vk->lock); pl_free_ptr((void **) pl_vk); } static bool supports_surf(pl_log log, VkInstance inst, PFN_vkGetInstanceProcAddr get_addr, VkPhysicalDevice physd, VkSurfaceKHR surf) { // Hack for the VK macro's logging to work struct { pl_log log; } *vk = (void *) &log; PL_VK_LOAD_FUN(inst, GetPhysicalDeviceQueueFamilyProperties, get_addr); PL_VK_LOAD_FUN(inst, GetPhysicalDeviceSurfaceSupportKHR, get_addr); uint32_t qfnum = 0; GetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL); for (int i = 0; i < qfnum; i++) { VkBool32 sup = false; VK(GetPhysicalDeviceSurfaceSupportKHR(physd, i, surf, &sup)); if (sup) return true; } error: return false; } VkPhysicalDevice pl_vulkan_choose_device(pl_log log, const struct pl_vulkan_device_params *params) { // Hack for the VK macro's logging to work struct { pl_log log; } *vk = (void *) &log; PL_INFO(vk, "Probing for vulkan devices:"); pl_assert(params->instance); VkInstance inst = params->instance; VkPhysicalDevice dev = VK_NULL_HANDLE; PFN_vkGetInstanceProcAddr get_addr; if (!(get_addr = get_proc_addr_fallback(log, params->get_proc_addr))) return NULL; PL_VK_LOAD_FUN(inst, EnumeratePhysicalDevices, get_addr); PL_VK_LOAD_FUN(inst, GetPhysicalDeviceProperties2, get_addr); pl_assert(GetPhysicalDeviceProperties2); VkPhysicalDevice *devices = NULL; uint32_t num = 0; VK(EnumeratePhysicalDevices(inst, &num, NULL)); devices = pl_calloc_ptr(NULL, num, devices); VK(EnumeratePhysicalDevices(inst, &num, devices)); static const struct { const char *name; int priority; } types[] = { [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = {"discrete", 5}, [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = {"integrated", 4}, [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = {"virtual", 3}, [VK_PHYSICAL_DEVICE_TYPE_CPU] = {"software", 2}, [VK_PHYSICAL_DEVICE_TYPE_OTHER] = {"other", 1}, }; static const uint8_t nil[VK_UUID_SIZE] = {0}; bool uuid_set = memcmp(params->device_uuid, nil, VK_UUID_SIZE) != 0; int best = -1; for (int i = 0; i < num; i++) { VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2 prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; GetPhysicalDeviceProperties2(devices[i], &prop); VkPhysicalDeviceType t = prop.properties.deviceType; const char *dtype = t < PL_ARRAY_SIZE(types) ? types[t].name : "unknown?"; PL_INFO(vk, " GPU %d: %s (%s)", i, prop.properties.deviceName, dtype); PL_INFO(vk, " uuid: %s", PRINT_UUID(id_props.deviceUUID)); if (params->surface) { if (!supports_surf(log, inst, get_addr, devices[i], params->surface)) { PL_DEBUG(vk, " -> excluding due to lack of surface support"); continue; } } if (uuid_set) { if (memcmp(id_props.deviceUUID, params->device_uuid, VK_UUID_SIZE) == 0) { dev = devices[i]; continue; } else { PL_DEBUG(vk, " -> excluding due to UUID mismatch"); continue; } } else if (params->device_name && params->device_name[0] != '\0') { if (strcmp(params->device_name, prop.properties.deviceName) == 0) { dev = devices[i]; continue; } else { PL_DEBUG(vk, " -> excluding due to name mismatch"); continue; } } if (!params->allow_software && t == VK_PHYSICAL_DEVICE_TYPE_CPU) { PL_DEBUG(vk, " -> excluding due to params->allow_software"); continue; } if (prop.properties.apiVersion < VK_API_VERSION_1_1) { PL_DEBUG(vk, " -> excluding due to too low API version"); continue; } int priority = t < PL_ARRAY_SIZE(types) ? types[t].priority : 0; if (priority > best) { dev = devices[i]; best = priority; } } error: pl_free(devices); return dev; } // Find the most specialized queue supported a combination of flags. In cases // where there are multiple queue families at the same specialization level, // this finds the one with the most queues. Returns -1 if no queue was found. static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags) { int idx = -1; for (int i = 0; i < qfnum; i++) { if (!(qfs[i].queueFlags & flags)) continue; // QF is more specialized. Since we don't care about other bits like // SPARSE_BIT, mask the ones we're interestew in const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_COMPUTE_BIT; if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask)) idx = i; // QF has more queues (at the same specialization level) if (qfs[i].queueFlags == qfs[idx].queueFlags && qfs[i].queueCount > qfs[idx].queueCount) idx = i; } return idx; } typedef PL_ARRAY(VkDeviceQueueCreateInfo) qinfo_arr_t; static void add_qinfo(void *alloc, qinfo_arr_t *qinfos, VkQueueFamilyProperties *qfs, int idx, int qcount) { if (idx < 0) return; // Check to see if we've already added this queue family for (int i = 0; i < qinfos->num; i++) { if (qinfos->elem[i].queueFamilyIndex == idx) return; } if (!qcount) qcount = qfs[idx].queueCount; float *priorities = pl_calloc_ptr(alloc, qcount, priorities); VkDeviceQueueCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = idx, .queueCount = PL_MIN(qcount, qfs[idx].queueCount), .pQueuePriorities = priorities, }; PL_ARRAY_APPEND(alloc, *qinfos, qinfo); } static bool device_init(struct vk_ctx *vk, const struct pl_vulkan_params *params) { pl_assert(vk->physd); void *tmp = pl_tmp(NULL); // Enumerate the queue families and find suitable families for each task uint32_t qfnum = 0; vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); VkQueueFamilyProperties *qfs = pl_calloc_ptr(tmp, qfnum, qfs); vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); PL_DEBUG(vk, "Queue families supported by device:"); for (int i = 0; i < qfnum; i++) { PL_DEBUG(vk, " %d: flags 0x%x num %d", i, (unsigned) qfs[i].queueFlags, (int) qfs[i].queueCount); } int idx_gfx = -1, idx_comp = -1, idx_tf = -1; idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT); if (params->async_compute) idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT); if (params->async_transfer) idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT); // Vulkan requires at least one GRAPHICS queue, so if this fails something // is horribly wrong. pl_assert(idx_gfx >= 0); PL_DEBUG(vk, "Using graphics queue %d", idx_gfx); // If needed, ensure we can actually present to the surface using this queue if (params->surface) { VkBool32 sup = false; VK(vk->GetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, params->surface, &sup)); if (!sup) { PL_FATAL(vk, "Queue family does not support surface presentation!"); goto error; } } // Fall back to supporting compute shaders via the graphics pool for // devices which support compute shaders but not async compute. if (idx_comp < 0 && qfs[idx_gfx].queueFlags & VK_QUEUE_COMPUTE_BIT) idx_comp = idx_gfx; if (idx_tf >= 0 && idx_tf != idx_gfx) PL_INFO(vk, "Using async transfer (queue %d)", idx_tf); if (idx_comp >= 0 && idx_comp != idx_gfx) PL_INFO(vk, "Using async compute (queue %d)", idx_comp); // Now that we know which QFs we want, we can create the logical device qinfo_arr_t qinfos = {0}; add_qinfo(tmp, &qinfos, qfs, idx_gfx, params->queue_count); add_qinfo(tmp, &qinfos, qfs, idx_comp, params->queue_count); add_qinfo(tmp, &qinfos, qfs, idx_tf, params->queue_count); // Enumerate all supported extensions uint32_t num_exts_avail = 0; VK(vk->EnumerateDeviceExtensionProperties(vk->physd, NULL, &num_exts_avail, NULL)); VkExtensionProperties *exts_avail = pl_calloc_ptr(tmp, num_exts_avail, exts_avail); VK(vk->EnumerateDeviceExtensionProperties(vk->physd, NULL, &num_exts_avail, exts_avail)); PL_DEBUG(vk, "Available device extensions:"); for (int i = 0; i < num_exts_avail; i++) PL_DEBUG(vk, " %s", exts_avail[i].extensionName); // Add all extensions we need if (params->surface) PL_ARRAY_APPEND(vk->alloc, vk->exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); // Keep track of all optional function pointers associated with extensions PL_ARRAY(const struct vk_fun *) ext_funs = {0}; // Add all optional device-level extensions extensions for (int i = 0; i < PL_ARRAY_SIZE(vk_device_extensions); i++) { const struct vk_ext *ext = &vk_device_extensions[i]; if (ext->core_ver && vk->api_ver >= ext->core_ver) { // Layer is already implicitly enabled by the API version for (const struct vk_fun *f = ext->funs; f && f->name; f++) PL_ARRAY_APPEND(tmp, ext_funs, f); continue; } for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext->name, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(vk->alloc, vk->exts, ext->name); for (const struct vk_fun *f = ext->funs; f && f->name; f++) PL_ARRAY_APPEND(tmp, ext_funs, f); break; } } } // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) PL_ARRAY_APPEND(vk->alloc, vk->exts, params->extensions[i]); // Add optional extra user extensions for (int i = 0; i < params->num_opt_extensions; i++) { const char *ext = params->opt_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(vk->alloc, vk->exts, ext); break; } } } // Query all supported device features by constructing a pNext chain // starting with the features we care about and ending with whatever // features were requested by the user vk->features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; for (const VkBaseInStructure *in = pl_vulkan_recommended_features.pNext; in; in = in->pNext) vk_link_struct(&vk->features, vk_struct_memdup(vk->alloc, in)); for (const VkBaseInStructure *in = (const VkBaseInStructure *) params->features; in; in = in->pNext) { if (vk_find_struct(&vk->features, in->sType)) continue; // skip structs already present void *copy = vk_struct_memdup(vk->alloc, in); if (!copy) { PL_ERR(vk, "Unknown struct type %"PRIu64"?", (uint64_t) in->sType); continue; } vk_link_struct(&vk->features, copy); } vk->GetPhysicalDeviceFeatures2KHR(vk->physd, &vk->features); const VkPhysicalDeviceTimelineSemaphoreFeatures *timeline_sem; timeline_sem = vk_find_struct(&vk->features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); if (!timeline_sem || !timeline_sem->timelineSemaphore) { PL_ERR(vk, "Selected vulkan device does not support timeline semaphores!"); goto error; } // Go through the features chain a second time and mask every option // that wasn't whitelisted by either libplacebo or the user for (VkBaseOutStructure *chain = (VkBaseOutStructure *) &vk->features; chain; chain = chain->pNext) { const VkBaseInStructure *in_a, *in_b; in_a = vk_find_struct(&pl_vulkan_recommended_features, chain->sType); in_b = vk_find_struct(params->features, chain->sType); in_a = PL_DEF(in_a, in_b); in_b = PL_DEF(in_b, in_a); pl_assert(in_a && in_b); VkBool32 *req = (VkBool32 *) &chain[1]; const VkBool32 *wl_a = (const VkBool32 *) &in_a[1]; const VkBool32 *wl_b = (const VkBool32 *) &in_b[1]; size_t size = vk_struct_size(chain->sType) - sizeof(chain[0]); for (int i = 0; i < size / sizeof(VkBool32); i++) req[i] &= wl_a[i] || wl_b[i]; } VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &vk->features, .pQueueCreateInfos = qinfos.elem, .queueCreateInfoCount = qinfos.num, .ppEnabledExtensionNames = vk->exts.elem, .enabledExtensionCount = vk->exts.num, }; PL_INFO(vk, "Creating vulkan device%s", vk->exts.num ? " with extensions:" : ""); for (int i = 0; i < vk->exts.num; i++) PL_INFO(vk, " %s", vk->exts.elem[i]); VK(vk->CreateDevice(vk->physd, &dinfo, PL_VK_ALLOC, &vk->dev)); // Load all mandatory device-level functions for (int i = 0; i < PL_ARRAY_SIZE(vk_dev_funs); i++) load_vk_fun(vk, &vk_dev_funs[i]); // Load all of the optional functions from the extensions we enabled for (int i = 0; i < ext_funs.num; i++) load_vk_fun(vk, ext_funs.elem[i]); // Create the command pools for (int i = 0; i < qinfos.num; i++) { int qf = qinfos.elem[i].queueFamilyIndex; struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos.elem[i], qfs[qf]); if (!pool) goto error; PL_ARRAY_APPEND(vk->alloc, vk->pools, pool); // Update the pool_* pointers based on the corresponding index const char *qf_name = NULL; if (qf == idx_tf) { vk->pool_transfer = pool; qf_name = "transfer"; } if (qf == idx_comp) { vk->pool_compute = pool; qf_name = "compute"; } if (qf == idx_gfx) { vk->pool_graphics = pool; qf_name = "graphics"; } for (int n = 0; n < pool->num_queues; n++) PL_VK_NAME(QUEUE, pool->queues[n], qf_name); } pl_free(tmp); return true; error: PL_FATAL(vk, "Failed creating logical device!"); pl_free(tmp); vk->failed = true; return false; } pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params) { params = PL_DEF(params, &pl_vulkan_default_params); struct pl_vulkan *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct vk_ctx); struct vk_ctx *vk = PL_PRIV(pl_vk); *vk = (struct vk_ctx) { .vulkan = pl_vk, .alloc = pl_vk, .log = log, .inst = params->instance, .GetInstanceProcAddr = get_proc_addr_fallback(log, params->get_proc_addr), }; pl_mutex_init_type(&vk->lock, PL_MUTEX_RECURSIVE); if (!vk->GetInstanceProcAddr) goto error; if (!vk->inst) { pl_assert(!params->surface); pl_assert(!params->device); PL_DEBUG(vk, "No VkInstance provided, creating one..."); // Mirror the instance params here to set `get_proc_addr` correctly struct pl_vk_inst_params iparams; iparams = *PL_DEF(params->instance_params, &pl_vk_inst_default_params); iparams.get_proc_addr = params->get_proc_addr; vk->internal_instance = pl_vk_inst_create(log, &iparams); if (!vk->internal_instance) goto error; vk->inst = vk->internal_instance->instance; } // Directly load all mandatory instance-level function pointers, since // these will be required for all further device creation logic for (int i = 0; i < PL_ARRAY_SIZE(vk_inst_funs); i++) load_vk_fun(vk, &vk_inst_funs[i]); // Choose the physical device if (params->device) { PL_DEBUG(vk, "Using specified VkPhysicalDevice"); vk->physd = params->device; } else { struct pl_vulkan_device_params dparams = { .instance = vk->inst, .get_proc_addr = params->get_proc_addr, .surface = params->surface, .device_name = params->device_name, .allow_software = params->allow_software, }; memcpy(dparams.device_uuid, params->device_uuid, VK_UUID_SIZE); vk->physd = pl_vulkan_choose_device(log, &dparams); if (!vk->physd) { PL_FATAL(vk, "Found no suitable device, giving up."); goto error; } } VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2KHR prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; vk->GetPhysicalDeviceProperties2(vk->physd, &prop); vk->limits = prop.properties.limits; PL_INFO(vk, "Vulkan device properties:"); PL_INFO(vk, " Device Name: %s", prop.properties.deviceName); PL_INFO(vk, " Device ID: %x:%x", (unsigned) prop.properties.vendorID, (unsigned) prop.properties.deviceID); PL_INFO(vk, " Device UUID: %s", PRINT_UUID(id_props.deviceUUID)); PL_INFO(vk, " Driver version: %d", (int) prop.properties.driverVersion); PL_INFO(vk, " API version: %d.%d.%d", PRINTF_VER(prop.properties.apiVersion)); // Needed by device_init vk->api_ver = prop.properties.apiVersion; if (params->max_api_version) { vk->api_ver = PL_MIN(vk->api_ver, params->max_api_version); PL_INFO(vk, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(vk->api_ver)); } if (vk->api_ver < VK_API_VERSION_1_1) { PL_FATAL(vk, "Device API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(vk->api_ver), PRINTF_VER(VK_API_VERSION_1_1)); goto error; } // Finally, initialize the logical device and the rest of the vk_ctx if (!device_init(vk, params)) goto error; vk->ma = vk_malloc_create(vk); if (!vk->ma) goto error; pl_vk->gpu = pl_gpu_create_vk(vk); if (!pl_vk->gpu) goto error; // Blacklist / restrict features if (params->max_glsl_version) { struct pl_glsl_version *glsl = (struct pl_glsl_version *) &pl_vk->gpu->glsl; glsl->version = PL_MIN(glsl->version, params->max_glsl_version); glsl->version = PL_MAX(glsl->version, 140); // required for GL_KHR_vulkan_glsl PL_INFO(vk, "Restricting GLSL version to %d... new version is %d", params->max_glsl_version, glsl->version); } // Expose the resulting vulkan objects pl_vk->instance = vk->inst; pl_vk->phys_device = vk->physd; pl_vk->device = vk->dev; pl_vk->api_version = vk->api_ver; pl_vk->extensions = vk->exts.elem; pl_vk->num_extensions = vk->exts.num; pl_vk->features = &vk->features; pl_vk->num_queues = vk->pools.num; pl_vk->queues = pl_calloc_ptr(pl_vk, vk->pools.num, pl_vk->queues); for (int i = 0; i < vk->pools.num; i++) { struct pl_vulkan_queue *queues = (struct pl_vulkan_queue *) pl_vk->queues; queues[i] = (struct pl_vulkan_queue) { .index = vk->pools.elem[i]->qf, .count = vk->pools.elem[i]->num_queues, }; if (vk->pools.elem[i] == vk->pool_graphics) pl_vk->queue_graphics = queues[i]; if (vk->pools.elem[i] == vk->pool_compute && vk->pool_compute != vk->pool_graphics) pl_vk->queue_compute = queues[i]; if (vk->pools.elem[i] == vk->pool_compute) pl_vk->queue_compute = queues[i]; } return pl_vk; error: PL_FATAL(vk, "Failed initializing vulkan device"); pl_vulkan_destroy((pl_vulkan *) &pl_vk); return NULL; } pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params) { void *tmp = pl_tmp(NULL); struct pl_vulkan *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct vk_ctx); struct vk_ctx *vk = PL_PRIV(pl_vk); *vk = (struct vk_ctx) { .vulkan = pl_vk, .alloc = pl_vk, .log = log, .imported = true, .inst = params->instance, .physd = params->phys_device, .dev = params->device, .GetInstanceProcAddr = get_proc_addr_fallback(log, params->get_proc_addr), }; pl_mutex_init_type(&vk->lock, PL_MUTEX_RECURSIVE); if (!vk->GetInstanceProcAddr) goto error; for (int i = 0; i < PL_ARRAY_SIZE(vk_inst_funs); i++) load_vk_fun(vk, &vk_inst_funs[i]); VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2KHR prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; pl_assert(vk->GetPhysicalDeviceProperties2); vk->GetPhysicalDeviceProperties2(vk->physd, &prop); vk->limits = prop.properties.limits; PL_INFO(vk, "Imported vulkan device properties:"); PL_INFO(vk, " Device Name: %s", prop.properties.deviceName); PL_INFO(vk, " Device ID: %x:%x", (unsigned) prop.properties.vendorID, (unsigned) prop.properties.deviceID); PL_INFO(vk, " Device UUID: %s", PRINT_UUID(id_props.deviceUUID)); PL_INFO(vk, " Driver version: %d", (int) prop.properties.driverVersion); PL_INFO(vk, " API version: %d.%d.%d", PRINTF_VER(prop.properties.apiVersion)); vk->api_ver = prop.properties.apiVersion; if (params->max_api_version) { vk->api_ver = PL_MIN(vk->api_ver, params->max_api_version); PL_INFO(vk, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(vk->api_ver)); } if (vk->api_ver < VK_API_VERSION_1_1) { PL_FATAL(vk, "Device API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(vk->api_ver), PRINTF_VER(VK_API_VERSION_1_1)); goto error; } VkPhysicalDeviceFeatures2 *features; features = vk_chain_memdup(vk->alloc, params->features); if (features) { // Go through and replace all meta-features structs by their individual // extension variants, since that's what we check for in our code const VkPhysicalDeviceVulkan12Features *vk12 = vk_find_struct(features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); if (vk12 && vk12->hostQueryReset) { const VkPhysicalDeviceHostQueryResetFeatures hqr = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, .hostQueryReset = true, }; vk_link_struct(features, vk_struct_memdup(vk->alloc, &hqr)); } if (vk12 && vk12->timelineSemaphore) { const VkPhysicalDeviceTimelineSemaphoreFeatures ts = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, .timelineSemaphore = true, }; vk_link_struct(features, vk_struct_memdup(vk->alloc, &ts)); } vk->features = *features; } const VkPhysicalDeviceTimelineSemaphoreFeatures *timeline_sem; timeline_sem = vk_find_struct(&vk->features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); if (!timeline_sem || !timeline_sem->timelineSemaphore) { PL_ERR(vk, "Imported Vulkan device does not support timeline " "semaphores. Please enable this device feature."); goto error; } // Load all mandatory device-level functions for (int i = 0; i < PL_ARRAY_SIZE(vk_dev_funs); i++) load_vk_fun(vk, &vk_dev_funs[i]); // Load all of the optional functions from the extensions enabled for (int i = 0; i < PL_ARRAY_SIZE(vk_device_extensions); i++) { const struct vk_ext *ext = &vk_device_extensions[i]; for (int n = 0; n < params->num_extensions; n++) { if (strcmp(ext->name, params->extensions[n]) == 0 || (ext->core_ver && ext->core_ver >= vk->api_ver)) { // Extension is available, directly load it for (const struct vk_fun *f = ext->funs; f && f->name; f++) load_vk_fun(vk, f); break; } } } uint32_t qfnum = 0; vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); VkQueueFamilyProperties *qfs = pl_calloc_ptr(tmp, qfnum, qfs); vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); // Create the command pools for each unique qf that exists struct { const struct pl_vulkan_queue *info; struct vk_cmdpool **pool; } qinfos[] = { { .info = ¶ms->queue_graphics, .pool = &vk->pool_graphics, }, { .info = ¶ms->queue_compute, .pool = &vk->pool_compute, }, { .info = ¶ms->queue_transfer, .pool = &vk->pool_transfer, } }; for (int i = 0; i < PL_ARRAY_SIZE(qinfos); i++) { int qf = qinfos[i].info->index; struct vk_cmdpool **pool = qinfos[i].pool; if (!qinfos[i].info->count) continue; // See if we already created a pool for this queue family for (int j = 0; j < i; j++) { if (qinfos[j].info->count && qinfos[j].info->index == qf) { *pool = *qinfos[j].pool; goto next_qf; } } struct VkDeviceQueueCreateInfo qinfo = { .queueFamilyIndex = qf, .queueCount = qinfos[i].info->count, }; *pool = vk_cmdpool_create(vk, qinfo, qfs[qf]); if (!*pool) goto error; PL_ARRAY_APPEND(vk->alloc, vk->pools, *pool); next_qf: ; } if (!vk->pool_compute && (vk->pool_graphics->props.queueFlags & VK_QUEUE_COMPUTE_BIT)) vk->pool_compute = vk->pool_graphics; vk->ma = vk_malloc_create(vk); if (!vk->ma) goto error; pl_vk->gpu = pl_gpu_create_vk(vk); if (!pl_vk->gpu) goto error; // Blacklist / restrict features if (params->max_glsl_version) { struct pl_glsl_version *glsl = (struct pl_glsl_version *) &pl_vk->gpu->glsl; glsl->version = PL_MIN(glsl->version, params->max_glsl_version); PL_INFO(vk, "Restricting GLSL version to %d... new version is %d", params->max_glsl_version, glsl->version); } // Expose the resulting vulkan objects pl_vk->instance = vk->inst; pl_vk->phys_device = vk->physd; pl_vk->device = vk->dev; pl_vk->api_version = vk->api_ver; pl_vk->extensions = vk->exts.elem; pl_vk->num_extensions = vk->exts.num; pl_vk->features = &vk->features; pl_vk->num_queues = vk->pools.num; pl_vk->queues = pl_calloc_ptr(pl_vk, vk->pools.num, pl_vk->queues); struct pl_vulkan_queue *queues = (struct pl_vulkan_queue *) pl_vk->queues; for (int i = 0; i < vk->pools.num; i++) { queues[i] = (struct pl_vulkan_queue) { .index = vk->pools.elem[i]->qf, .count = vk->pools.elem[i]->num_queues, }; if (vk->pools.elem[i] == vk->pool_graphics) pl_vk->queue_graphics = queues[i]; if (vk->pools.elem[i] == vk->pool_compute && vk->pool_compute != vk->pool_graphics) pl_vk->queue_compute = queues[i]; if (vk->pools.elem[i] == vk->pool_compute) pl_vk->queue_compute = queues[i]; } pl_free(tmp); return pl_vk; error: PL_FATAL(vk, "Failed importing vulkan device"); pl_free(tmp); pl_vulkan_destroy((pl_vulkan *) &pl_vk); return NULL; } libplacebo-v4.192.1/src/vulkan/formats.c000066400000000000000000000443311417677245700201110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "formats.h" #define FMT(_name, num, size, ftype, bits, idx) \ (struct pl_fmt) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .num_components = num, \ .component_depth = bits, \ .internal_size = size, \ .opaque = false, \ .texel_size = size, \ .texel_align = size, \ .host_bits = bits, \ .sample_order = idx, \ } #define IDX(...) {__VA_ARGS__} #define BITS(...) {__VA_ARGS__} #define REGFMT(name, num, bits, type) \ FMT(name, num, (num) * (bits) / 8, type, \ BITS(bits, bits, bits, bits), \ IDX(0, 1, 2, 3)) #define EMUFMT(_name, in, en, ib, eb, ftype) \ (struct pl_fmt) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .num_components = en, \ .component_depth = BITS(ib, ib, ib, ib),\ .internal_size = (in) * (ib) / 8, \ .opaque = false, \ .emulated = true, \ .texel_size = (en) * (eb) / 8, \ .texel_align = (eb) / 8, \ .host_bits = BITS(eb, eb, eb, eb),\ .sample_order = IDX(0, 1, 2, 3), \ } static const struct vk_format rgb8e = { .tfmt = VK_FORMAT_R8G8B8A8_UNORM, .bfmt = VK_FORMAT_R8G8B8_UNORM, .icomps = 4, .fmt = EMUFMT("rgb8", 4, 3, 8, 8, UNORM), }; static const struct vk_format rgb16e = { .tfmt = VK_FORMAT_R16G16B16A16_UNORM, .bfmt = VK_FORMAT_R16G16B16_UNORM, .icomps = 4, .fmt = EMUFMT("rgb16", 4, 3, 16, 16, UNORM), }; const struct vk_format vk_formats[] = { // Regular, byte-aligned integer formats {VK_FORMAT_R8_UNORM, REGFMT("r8", 1, 8, UNORM)}, {VK_FORMAT_R8G8_UNORM, REGFMT("rg8", 2, 8, UNORM)}, {VK_FORMAT_R8G8B8_UNORM, REGFMT("rgb8", 3, 8, UNORM), .emufmt = &rgb8e}, {VK_FORMAT_R8G8B8A8_UNORM, REGFMT("rgba8", 4, 8, UNORM)}, {VK_FORMAT_R16_UNORM, REGFMT("r16", 1, 16, UNORM)}, {VK_FORMAT_R16G16_UNORM, REGFMT("rg16", 2, 16, UNORM)}, {VK_FORMAT_R16G16B16_UNORM, REGFMT("rgb16", 3, 16, UNORM), .emufmt = &rgb16e}, {VK_FORMAT_R16G16B16A16_UNORM, REGFMT("rgba16", 4, 16, UNORM)}, {VK_FORMAT_R8_SNORM, REGFMT("r8s", 1, 8, SNORM)}, {VK_FORMAT_R8G8_SNORM, REGFMT("rg8s", 2, 8, SNORM)}, {VK_FORMAT_R8G8B8_SNORM, REGFMT("rgb8s", 3, 8, SNORM)}, {VK_FORMAT_R8G8B8A8_SNORM, REGFMT("rgba8s", 4, 8, SNORM)}, {VK_FORMAT_R16_SNORM, REGFMT("r16s", 1, 16, SNORM)}, {VK_FORMAT_R16G16_SNORM, REGFMT("rg16s", 2, 16, SNORM)}, {VK_FORMAT_R16G16B16_SNORM, REGFMT("rgb16s", 3, 16, SNORM)}, {VK_FORMAT_R16G16B16A16_SNORM, REGFMT("rgba16s", 4, 16, SNORM)}, // Float formats (native formats: hf = half float, df = double float) {VK_FORMAT_R16_SFLOAT, REGFMT("r16hf", 1, 16, FLOAT)}, {VK_FORMAT_R16G16_SFLOAT, REGFMT("rg16hf", 2, 16, FLOAT)}, {VK_FORMAT_R16G16B16_SFLOAT, REGFMT("rgb16hf", 3, 16, FLOAT)}, {VK_FORMAT_R16G16B16A16_SFLOAT, REGFMT("rgba16hf", 4, 16, FLOAT)}, {VK_FORMAT_R32_SFLOAT, REGFMT("r32f", 1, 32, FLOAT)}, {VK_FORMAT_R32G32_SFLOAT, REGFMT("rg32f", 2, 32, FLOAT)}, {VK_FORMAT_R32G32B32_SFLOAT, REGFMT("rgb32f", 3, 32, FLOAT)}, {VK_FORMAT_R32G32B32A32_SFLOAT, REGFMT("rgba32f", 4, 32, FLOAT)}, // Float formats (emulated upload/download) {VK_FORMAT_R16_SFLOAT, EMUFMT("r16f", 1, 1, 16, 32, FLOAT)}, {VK_FORMAT_R16G16_SFLOAT, EMUFMT("rg16f", 2, 2, 16, 32, FLOAT)}, {VK_FORMAT_R16G16B16_SFLOAT, EMUFMT("rgb16f", 3, 3, 16, 32, FLOAT)}, {VK_FORMAT_R16G16B16A16_SFLOAT, EMUFMT("rgba16f", 4, 4, 16, 32, FLOAT)}, // Integer-sampled formats {VK_FORMAT_R8_UINT, REGFMT("r8u", 1, 8, UINT)}, {VK_FORMAT_R8G8_UINT, REGFMT("rg8u", 2, 8, UINT)}, {VK_FORMAT_R8G8B8_UINT, REGFMT("rgb8u", 3, 8, UINT)}, {VK_FORMAT_R8G8B8A8_UINT, REGFMT("rgba8u", 4, 8, UINT)}, {VK_FORMAT_R16_UINT, REGFMT("r16u", 1, 16, UINT)}, {VK_FORMAT_R16G16_UINT, REGFMT("rg16u", 2, 16, UINT)}, {VK_FORMAT_R16G16B16_UINT, REGFMT("rgb16u", 3, 16, UINT)}, {VK_FORMAT_R16G16B16A16_UINT, REGFMT("rgba16u", 4, 16, UINT)}, {VK_FORMAT_R32_UINT, REGFMT("r32u", 1, 32, UINT)}, {VK_FORMAT_R32G32_UINT, REGFMT("rg32u", 2, 32, UINT)}, {VK_FORMAT_R32G32B32_UINT, REGFMT("rgb32u", 3, 32, UINT)}, {VK_FORMAT_R32G32B32A32_UINT, REGFMT("rgba32u", 4, 32, UINT)}, {VK_FORMAT_R8_SINT, REGFMT("r8i", 1, 8, SINT)}, {VK_FORMAT_R8G8_SINT, REGFMT("rg8i", 2, 8, SINT)}, {VK_FORMAT_R8G8B8_SINT, REGFMT("rgb8i", 3, 8, SINT)}, {VK_FORMAT_R8G8B8A8_SINT, REGFMT("rgba8i", 4, 8, SINT)}, {VK_FORMAT_R16_SINT, REGFMT("r16i", 1, 16, SINT)}, {VK_FORMAT_R16G16_SINT, REGFMT("rg16i", 2, 16, SINT)}, {VK_FORMAT_R16G16B16_SINT, REGFMT("rgb16i", 3, 16, SINT)}, {VK_FORMAT_R16G16B16A16_SINT, REGFMT("rgba16i", 4, 16, SINT)}, {VK_FORMAT_R32_SINT, REGFMT("r32i", 1, 32, SINT)}, {VK_FORMAT_R32G32_SINT, REGFMT("rg32i", 2, 32, SINT)}, {VK_FORMAT_R32G32B32_SINT, REGFMT("rgb32i", 3, 32, SINT)}, {VK_FORMAT_R32G32B32A32_SINT, REGFMT("rgba32i", 4, 32, SINT)}, // "Swapped" component order formats {VK_FORMAT_B8G8R8_UNORM, FMT("bgr8", 3, 3, UNORM, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UNORM, FMT("bgra8", 4, 4, UNORM, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_B8G8R8_UINT, FMT("bgr8u", 3, 3, UINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UINT, FMT("bgra8u", 4, 4, UINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_B8G8R8_SINT, FMT("bgr8i", 3, 3, SINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_SINT, FMT("bgra8i", 4, 4, SINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, // "Packed" integer formats // // Note: These have the component order reversed from what the vulkan name // implies, because we order our IDX from LSB to MSB (consistent with the // usual ordering from lowest byte to highest byte, on little endian // platforms), but Vulkan names them from MSB to LSB. {VK_FORMAT_R4G4_UNORM_PACK8, FMT("gr4", 2, 1, UNORM, BITS(4, 4), IDX(1, 0))}, {VK_FORMAT_B4G4R4A4_UNORM_PACK16, FMT("argb4", 4, 2, UNORM, BITS(4, 4, 4, 4), IDX(3, 0, 1, 2))}, {VK_FORMAT_R4G4B4A4_UNORM_PACK16, FMT("abgr4", 4, 2, UNORM, BITS(4, 4, 4, 4), IDX(3, 2, 1, 0))}, {VK_FORMAT_R5G6B5_UNORM_PACK16, FMT("bgr565", 3, 2, UNORM, BITS(5, 6, 5), IDX(2, 1, 0))}, {VK_FORMAT_B5G6R5_UNORM_PACK16, FMT("rgb565", 3, 2, UNORM, BITS(5, 6, 5), IDX(0, 1, 2))}, {VK_FORMAT_R5G5B5A1_UNORM_PACK16, FMT("a1bgr5", 4, 2, UNORM, BITS(1, 5, 5, 5), IDX(3, 2, 1, 0))}, {VK_FORMAT_B5G5R5A1_UNORM_PACK16, FMT("a1rgb5", 4, 2, UNORM, BITS(1, 5, 5, 5), IDX(3, 0, 1, 2))}, {VK_FORMAT_A1R5G5B5_UNORM_PACK16, FMT("bgr5a1", 4, 2, UNORM, BITS(5, 5, 5, 1), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_UNORM_PACK32, FMT("rgb10a2", 4, 4, UNORM, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_UNORM_PACK32, FMT("bgr10a2", 4, 4, UNORM, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_SNORM_PACK32, FMT("rgb10a2s", 4, 4, SNORM, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_SNORM_PACK32, FMT("bgr10a2s", 4, 4, SNORM, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_UINT_PACK32, FMT("rgb10a2u", 4, 4, UINT, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_UINT_PACK32, FMT("bgr10a2u", 4, 4, UINT, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_SINT_PACK32, FMT("rgb10a2i", 4, 4, SINT, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_SINT_PACK32, FMT("bgr10a2i", 4, 4, SINT, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {0} }; #undef BITS #undef IDX #undef REGFMT #undef FMT void vk_setup_formats(struct pl_gpu *gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; PL_ARRAY(pl_fmt) formats = {0}; // Texture format emulation requires at least support for texel buffers bool has_emu = gpu->glsl.compute && gpu->limits.max_buffer_texels; for (const struct vk_format *pvk_fmt = vk_formats; pvk_fmt->tfmt; pvk_fmt++) { const struct vk_format *vk_fmt = pvk_fmt; // Skip formats with innately emulated representation if unsupported if (vk_fmt->fmt.emulated && !has_emu) continue; // Suppress some errors/warnings spit out by the format probing code pl_log_level_cap(vk->log, PL_LOG_INFO); bool has_drm_mods = vk->GetImageDrmFormatModifierPropertiesEXT; VkDrmFormatModifierPropertiesEXT modifiers[16] = {0}; VkDrmFormatModifierPropertiesListEXT drm_props = { .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, .drmFormatModifierCount = PL_ARRAY_SIZE(modifiers), .pDrmFormatModifierProperties = modifiers, }; VkFormatProperties2KHR prop2 = { .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, .pNext = has_drm_mods ? &drm_props : NULL, }; vk->GetPhysicalDeviceFormatProperties2KHR(vk->physd, vk_fmt->tfmt, &prop2); // If wholly unsupported, try falling back to the emulation formats // for texture operations VkFormatProperties *prop = &prop2.formatProperties; while (has_emu && !prop->optimalTilingFeatures && vk_fmt->emufmt) { vk_fmt = vk_fmt->emufmt; vk->GetPhysicalDeviceFormatProperties2KHR(vk->physd, vk_fmt->tfmt, &prop2); } VkFormatFeatureFlags texflags = prop->optimalTilingFeatures; VkFormatFeatureFlags bufflags = prop->bufferFeatures; if (vk_fmt->fmt.emulated) { // Emulated formats might have a different buffer representation // than their texture representation. If they don't, assume their // buffer representation is nonsensical (e.g. r16f) if (vk_fmt->bfmt) { vk->GetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->bfmt, prop); bufflags = prop->bufferFeatures; } else { bufflags = 0; } } pl_log_level_cap(vk->log, PL_LOG_NONE); struct pl_fmt *fmt = pl_alloc_obj(gpu, fmt, struct pl_fmt_vk); struct pl_fmt_vk *fmtp = PL_PRIV(fmt); *fmt = vk_fmt->fmt; *fmtp = (struct pl_fmt_vk) { .vk_fmt = vk_fmt }; // Always set the signature to the actual texture format, so we can use // it to guarantee renderpass compatibility. fmt->signature = (uint64_t) vk_fmt->tfmt; // For sanity, clear the superfluous fields for (int i = fmt->num_components; i < 4; i++) { fmt->component_depth[i] = 0; fmt->sample_order[i] = 0; fmt->host_bits[i] = 0; } // We can set this universally fmt->fourcc = pl_fmt_fourcc(fmt); if (has_drm_mods) { if (drm_props.drmFormatModifierCount == PL_ARRAY_SIZE(modifiers)) { PL_WARN(gpu, "DRM modifier list for format %s possibly truncated", fmt->name); } // Query the list of supported DRM modifiers from the driver PL_ARRAY(uint64_t) modlist = {0}; for (int i = 0; i < drm_props.drmFormatModifierCount; i++) { if (modifiers[i].drmFormatModifierPlaneCount > 1) { PL_DEBUG(gpu, "Ignoring format modifier %s of " "format %s because its plane count %d > 1", PRINT_DRM_MOD(modifiers[i].drmFormatModifier), fmt->name, modifiers[i].drmFormatModifierPlaneCount); continue; } // Only warn about texture format features relevant to us const VkFormatFeatureFlags flag_mask = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; VkFormatFeatureFlags flags = modifiers[i].drmFormatModifierTilingFeatures; if ((flags & flag_mask) != (texflags & flag_mask)) { PL_INFO(gpu, "DRM format modifier %s of format %s " "supports fewer caps (0x%"PRIx32") than optimal tiling " "(0x%"PRIx32"), may result in limited capability!", PRINT_DRM_MOD(modifiers[i].drmFormatModifier), fmt->name, flags, texflags); } PL_ARRAY_APPEND(fmt, modlist, modifiers[i].drmFormatModifier); } fmt->num_modifiers = modlist.num; fmt->modifiers = modlist.elem; } else if (gpu->export_caps.tex & PL_HANDLE_DMA_BUF) { // Hard-code a list of static mods that we're likely to support static const uint64_t static_mods[2] = { DRM_FORMAT_MOD_INVALID, DRM_FORMAT_MOD_LINEAR, }; fmt->num_modifiers = PL_ARRAY_SIZE(static_mods); fmt->modifiers = static_mods; } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bufbits[] = { {VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT, PL_FMT_CAP_VERTEX}, {VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_UNIFORM}, {VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_STORAGE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bufbits); i++) { if ((bufflags & bufbits[i].flags) == bufbits[i].flags) fmt->caps |= bufbits[i].caps; } if (fmt->caps) { fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); pl_assert(fmt->glsl_type); } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bits[] = { {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT, PL_FMT_CAP_BLENDABLE}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, PL_FMT_CAP_LINEAR}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT, PL_FMT_CAP_SAMPLEABLE}, {VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT, PL_FMT_CAP_STORABLE}, {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT, PL_FMT_CAP_RENDERABLE}, // We don't distinguish between the two blit modes for pl_fmt_caps {VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT, PL_FMT_CAP_BLITTABLE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bits); i++) { if ((texflags & bits[i].flags) == bits[i].flags) fmt->caps |= bits[i].caps; } // For blit emulation via compute shaders if (!(fmt->caps & PL_FMT_CAP_BLITTABLE) && (fmt->caps & PL_FMT_CAP_STORABLE)) { fmt->caps |= PL_FMT_CAP_BLITTABLE; fmtp->blit_emulated = true; } // This is technically supported for all textures, but the semantics // of pl_gpu require it only be listed for non-opaque ones if (!fmt->opaque) fmt->caps |= PL_FMT_CAP_HOST_READABLE; // Vulkan requires a minimum GLSL version that supports textureGather() if (fmt->caps & PL_FMT_CAP_SAMPLEABLE) fmt->gatherable = true; // Disable implied capabilities where the dependencies are unavailable enum pl_fmt_caps storable = PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) fmt->caps &= ~PL_FMT_CAP_LINEAR; if (!gpu->glsl.compute) fmt->caps &= ~storable; bool has_nofmt = vk->features.features.shaderStorageImageReadWithoutFormat && vk->features.features.shaderStorageImageWriteWithoutFormat; if (fmt->caps & storable) { int real_comps = PL_DEF(vk_fmt->icomps, fmt->num_components); fmt->glsl_format = pl_fmt_glsl_format(fmt, real_comps); if (!fmt->glsl_format && !has_nofmt) { PL_WARN(gpu, "Storable format '%s' has no matching GLSL " "format qualifier but read/write without format " "is not supported.. disabling", fmt->name); fmt->caps &= ~storable; } } if (fmt->caps & storable) fmt->caps |= PL_FMT_CAP_READWRITE; PL_ARRAY_APPEND(gpu, formats, fmt); } gpu->formats = formats.elem; gpu->num_formats = formats.num; } libplacebo-v4.192.1/src/vulkan/formats.h000066400000000000000000000024221417677245700201110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "gpu.h" struct vk_format { VkFormat tfmt; // internal vulkan format enum (textures) struct pl_fmt fmt; // pl_fmt template (features will be auto-detected) int icomps; // internal component count (or 0 to infer from `fmt`) VkFormat bfmt; // vulkan format for use as buffers (or 0 to use `tfmt`) const struct vk_format *emufmt; // alternate format for emulation }; extern const struct vk_format vk_formats[]; // Add all supported formats to the `pl_gpu` format list void vk_setup_formats(struct pl_gpu *gpu); libplacebo-v4.192.1/src/vulkan/gpu.c000066400000000000000000000624421417677245700172340ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" #ifdef PL_HAVE_UNIX #include #endif // Gives us enough queries for 8 results #define QUERY_POOL_SIZE 16 struct pl_timer { VkQueryPool qpool; // even=start, odd=stop int index_write; // next index to write to int index_read; // next index to read from uint64_t pending; // bitmask of queries that are still running }; static inline uint64_t timer_bit(int index) { return 1llu << (index / 2); } static void timer_destroy_cb(pl_gpu gpu, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_assert(!timer->pending); vk->DestroyQueryPool(vk->dev, timer->qpool, PL_VK_ALLOC); pl_free(timer); } static pl_timer vk_timer_create(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_timer timer = pl_alloc_ptr(NULL, timer); *timer = (struct pl_timer) {0}; struct VkQueryPoolCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, .queryType = VK_QUERY_TYPE_TIMESTAMP, .queryCount = QUERY_POOL_SIZE, }; VK(vk->CreateQueryPool(vk->dev, &qinfo, PL_VK_ALLOC, &timer->qpool)); return timer; error: timer_destroy_cb(gpu, timer); return NULL; } static void vk_timer_destroy(pl_gpu gpu, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); if (p->cmd) { vk_cmd_callback(p->cmd, (vk_cb) timer_destroy_cb, gpu, timer); } else { vk_dev_callback(vk, (vk_cb) timer_destroy_cb, gpu, timer); } pl_mutex_unlock(&p->recording); } static uint64_t vk_timer_query(pl_gpu gpu, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; if (timer->index_read == timer->index_write) return 0; // no more unprocessed results vk_poll_commands(vk, 0); if (timer->pending & timer_bit(timer->index_read)) return 0; // still waiting for results VkResult res; uint64_t ts[2] = {0}; res = vk->GetQueryPoolResults(vk->dev, timer->qpool, timer->index_read, 2, sizeof(ts), &ts[0], sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); switch (res) { case VK_SUCCESS: timer->index_read = (timer->index_read + 2) % QUERY_POOL_SIZE; return (ts[1] - ts[0]) * vk->limits.timestampPeriod; case VK_NOT_READY: return 0; default: PL_VK_ASSERT(res, "Retrieving query pool results"); } error: return 0; } static void timer_begin(pl_gpu gpu, struct vk_cmd *cmd, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; if (!timer) return; if (!cmd->pool->props.timestampValidBits) { PL_TRACE(gpu, "QF %d does not support timestamp queries", cmd->pool->qf); return; } vk_poll_commands(vk, 0); if (timer->pending & timer_bit(timer->index_write)) return; // next query is still running, skip this timer VkQueueFlags reset_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; if (cmd->pool->props.queueFlags & reset_flags) { // Use direct command buffer resets vk->CmdResetQueryPool(cmd->buf, timer->qpool, timer->index_write, 2); } else if (p->host_query_reset) { // Use host query resets vk->ResetQueryPoolEXT(vk->dev, timer->qpool, timer->index_write, 2); } else { PL_TRACE(gpu, "QF %d supports no mechanism for resetting queries", cmd->pool->qf); return; } vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timer->qpool, timer->index_write); p->cmd_timer = timer; } static inline bool supports_marks(struct vk_cmd *cmd) { // Spec says debug markers are only available on graphics/compute queues VkQueueFlags flags = cmd->pool->props.queueFlags; return flags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); } struct vk_cmd *_begin_cmd(pl_gpu gpu, enum queue_type type, const char *label, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); struct vk_cmdpool *pool; switch (type) { case ANY: pool = p->cmd ? p->cmd->pool : vk->pool_graphics; break; case GRAPHICS: pool = vk->pool_graphics; break; case COMPUTE: pool = vk->pool_compute; break; // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec) case TRANSFER: pool = vk->pool_transfer; if (!pool) pool = vk->pool_compute; if (!pool) pool = vk->pool_graphics; break; default: pl_unreachable(); } if (!p->cmd || p->cmd->pool != pool) { vk_cmd_submit(vk, &p->cmd); p->cmd = vk_cmd_begin(vk, pool); if (!p->cmd) { pl_mutex_unlock(&p->recording); return NULL; } } if (vk->CmdBeginDebugUtilsLabelEXT && supports_marks(p->cmd)) { vk->CmdBeginDebugUtilsLabelEXT(p->cmd->buf, &(VkDebugUtilsLabelEXT) { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, .pLabelName = label, }); } timer_begin(gpu, p->cmd, timer); return p->cmd; } static void timer_end_cb(void *ptimer, void *pindex) { pl_timer timer = ptimer; int index = (uintptr_t) pindex; timer->pending &= ~timer_bit(index); } bool _end_cmd(pl_gpu gpu, struct vk_cmd **pcmd, bool submit) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; bool ret = true; if (!pcmd) { if (submit) { pl_mutex_lock(&p->recording); ret = vk_cmd_submit(p->vk, &p->cmd); pl_mutex_unlock(&p->recording); } return ret; } struct vk_cmd *cmd = *pcmd; pl_assert(p->cmd == cmd); if (p->cmd_timer) { pl_timer timer = p->cmd_timer; vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, timer->qpool, timer->index_write + 1); timer->pending |= timer_bit(timer->index_write); vk_cmd_callback(cmd, (vk_cb) timer_end_cb, timer, (void *) (uintptr_t) timer->index_write); timer->index_write = (timer->index_write + 2) % QUERY_POOL_SIZE; if (timer->index_write == timer->index_read) { // forcibly drop the least recent result to make space timer->index_read = (timer->index_read + 2) % QUERY_POOL_SIZE; } p->cmd_timer = NULL; } if (vk->CmdEndDebugUtilsLabelEXT && supports_marks(cmd)) vk->CmdEndDebugUtilsLabelEXT(cmd->buf); if (submit) ret = vk_cmd_submit(vk, &p->cmd); pl_mutex_unlock(&p->recording); return ret; } static void vk_gpu_destroy(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_dispatch_destroy(&p->dp); vk_cmd_submit(vk, &p->cmd); vk_wait_idle(vk); for (enum pl_tex_sample_mode s = 0; s < PL_TEX_SAMPLE_MODE_COUNT; s++) { for (enum pl_tex_address_mode a = 0; a < PL_TEX_ADDRESS_MODE_COUNT; a++) vk->DestroySampler(vk->dev, p->samplers[s][a], PL_VK_ALLOC); } spirv_compiler_destroy(&p->spirv); pl_mutex_destroy(&p->recording); pl_free((void *) gpu); } pl_vulkan pl_vulkan_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == vk_gpu_destroy) { struct pl_vk *p = (struct pl_vk *) impl; return p->vk->vulkan; } return NULL; } static pl_handle_caps vk_sync_handle_caps(struct vk_ctx *vk) { pl_handle_caps caps = 0; for (int i = 0; vk_sync_handle_list[i]; i++) { enum pl_handle_type type = vk_sync_handle_list[i]; VkPhysicalDeviceExternalSemaphoreInfo info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO_KHR, .handleType = vk_sync_handle_type(type), }; VkExternalSemaphoreProperties props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES_KHR, }; vk->GetPhysicalDeviceExternalSemaphoreProperties(vk->physd, &info, &props); VkExternalSemaphoreFeatureFlags flags = props.externalSemaphoreFeatures; if ((props.compatibleHandleTypes & info.handleType) && (flags & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR)) { caps |= type; } } return caps; } static pl_handle_caps vk_tex_handle_caps(struct vk_ctx *vk, bool import) { pl_handle_caps caps = 0; bool has_drm_mods = vk->GetImageDrmFormatModifierPropertiesEXT; for (int i = 0; vk_mem_handle_list[i]; i++) { enum pl_handle_type handle_type = vk_mem_handle_list[i]; // Query whether creation of a "basic" dummy texture would work VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .drmFormatModifier = DRM_FORMAT_MOD_LINEAR, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR, .handleType = vk_mem_handle_type(handle_type), }; VkPhysicalDeviceImageFormatInfo2KHR pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .pNext = &ext_pinfo, .format = VK_FORMAT_R8_UNORM, .type = VK_IMAGE_TYPE_2D, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, }; if (handle_type == PL_HANDLE_DMA_BUF && has_drm_mods) { vk_link_struct(&pinfo, &drm_pinfo); pinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; } VkExternalImageFormatPropertiesKHR ext_props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, }; VkImageFormatProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, .pNext = &ext_props, }; VkResult res; res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); if (res != VK_SUCCESS && handle_type == PL_HANDLE_DMA_BUF && !has_drm_mods) { // Try again with VK_IMAGE_TILING_LINEAR, as a dumb hack pinfo.tiling = VK_IMAGE_TILING_LINEAR; res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); } if (res != VK_SUCCESS) { PL_DEBUG(vk, "Tex caps for %s (0x%x) unsupported: %s", vk_handle_name(ext_pinfo.handleType), (unsigned int) handle_type, vk_res_str(res)); continue; } if (vk_external_mem_check(vk, &ext_props.externalMemoryProperties, handle_type, import)) { caps |= handle_type; } } return caps; } static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, }; static const struct pl_gpu_fns pl_fns_vk; pl_gpu pl_gpu_create_vk(struct vk_ctx *vk) { pl_assert(vk->dev); struct pl_gpu *gpu = pl_zalloc_obj(NULL, gpu, struct pl_vk); gpu->log = vk->log; gpu->ctx = gpu->log; struct pl_vk *p = PL_PRIV(gpu); pl_mutex_init(&p->recording); p->impl = pl_fns_vk; p->vk = vk; p->spirv = spirv_compiler_create(vk->log); if (!p->spirv) goto error; // Query all device properties VkPhysicalDevicePCIBusInfoPropertiesEXT pci_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, }; VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, .pNext = &pci_props, }; VkPhysicalDevicePushDescriptorPropertiesKHR pushd_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR, .pNext = &id_props, }; VkPhysicalDeviceSubgroupProperties group_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, .pNext = &pushd_props, }; VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, .pNext = &group_props, }; VkPhysicalDeviceProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &host_props, }; #ifdef VK_KHR_portability_subset VkPhysicalDevicePortabilitySubsetPropertiesKHR port_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_KHR, .minVertexInputBindingStrideAlignment = 1, }; vk_link_struct(&props, &port_props); #endif vk->GetPhysicalDeviceProperties2(vk->physd, &props); // Determine GLSL features and limits gpu->glsl = (struct pl_glsl_version) { .version = 450, .vulkan = true, }; if (vk->pool_compute) { gpu->glsl.compute = true; gpu->glsl.max_shmem_size = vk->limits.maxComputeSharedMemorySize; gpu->glsl.max_group_threads = vk->limits.maxComputeWorkGroupInvocations; for (int i = 0; i < 3; i++) gpu->glsl.max_group_size[i] = vk->limits.maxComputeWorkGroupSize[i]; } VkShaderStageFlags req_stages = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; VkSubgroupFeatureFlags req_flags = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT; if ((group_props.supportedStages & req_stages) == req_stages && (group_props.supportedOperations & req_flags) == req_flags) { gpu->glsl.subgroup_size = group_props.subgroupSize; } if (vk->features.features.shaderImageGatherExtended) { gpu->glsl.min_gather_offset = vk->limits.minTexelGatherOffset; gpu->glsl.max_gather_offset = vk->limits.maxTexelGatherOffset; } gpu->limits = (struct pl_gpu_limits) { // pl_gpu .thread_safe = true, .callbacks = true, // pl_buf .max_buf_size = SIZE_MAX, // no limit imposed by vulkan .max_ubo_size = vk->limits.maxUniformBufferRange, .max_ssbo_size = vk->limits.maxStorageBufferRange, .max_vbo_size = SIZE_MAX, .max_mapped_size = SIZE_MAX, .max_buffer_texels = vk->limits.maxTexelBufferElements, .align_host_ptr = host_props.minImportedHostPointerAlignment, // pl_tex .max_tex_1d_dim = vk->limits.maxImageDimension1D, .max_tex_2d_dim = vk->limits.maxImageDimension2D, .max_tex_3d_dim = vk->limits.maxImageDimension3D, .blittable_1d_3d = true, .buf_transfer = true, .align_tex_xfer_pitch = vk->limits.optimalBufferCopyRowPitchAlignment, .align_tex_xfer_offset = pl_lcm(vk->limits.optimalBufferCopyOffsetAlignment, 4), // pl_pass .max_variable_comps = 0, // vulkan doesn't support these at all .max_constants = SIZE_MAX, .max_pushc_size = vk->limits.maxPushConstantsSize, #ifdef VK_KHR_portability_subset .align_vertex_stride = port_props.minVertexInputBindingStrideAlignment, #else .align_vertex_stride = 1, #endif .max_dispatch = { vk->limits.maxComputeWorkGroupCount[0], vk->limits.maxComputeWorkGroupCount[1], vk->limits.maxComputeWorkGroupCount[2], }, .fragment_queues = vk->pool_graphics->num_queues, .compute_queues = vk->pool_compute ? vk->pool_compute->num_queues : 0, }; gpu->export_caps.buf = vk_malloc_handle_caps(vk->ma, false); gpu->import_caps.buf = vk_malloc_handle_caps(vk->ma, true); gpu->export_caps.tex = vk_tex_handle_caps(vk, false); gpu->import_caps.tex = vk_tex_handle_caps(vk, true); gpu->export_caps.sync = vk_sync_handle_caps(vk); gpu->import_caps.sync = 0; // Not supported yet if (pl_gpu_supports_interop(gpu)) { pl_static_assert(sizeof(gpu->uuid) == VK_UUID_SIZE); memcpy(gpu->uuid, id_props.deviceUUID, sizeof(gpu->uuid)); gpu->pci.domain = pci_props.pciDomain; gpu->pci.bus = pci_props.pciBus; gpu->pci.device = pci_props.pciDevice; gpu->pci.function = pci_props.pciFunction; } if (vk->CmdPushDescriptorSetKHR) p->max_push_descriptors = pushd_props.maxPushDescriptors; if (vk->ResetQueryPoolEXT) { const VkPhysicalDeviceHostQueryResetFeatures *host_query_reset; host_query_reset = vk_find_struct(&vk->features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES); if (host_query_reset) p->host_query_reset = host_query_reset->hostQueryReset; } vk_setup_formats(gpu); // Compute the correct minimum texture alignment p->min_texel_alignment = 1; for (int i = 0; i < gpu->num_formats; i++) { if (gpu->formats[i]->emulated) continue; size_t texel_size = gpu->formats[i]->texel_size; p->min_texel_alignment = pl_lcm(p->min_texel_alignment, texel_size); } PL_DEBUG(gpu, "Minimum texel alignment: %zu", p->min_texel_alignment); // Initialize the samplers for (enum pl_tex_sample_mode s = 0; s < PL_TEX_SAMPLE_MODE_COUNT; s++) { for (enum pl_tex_address_mode a = 0; a < PL_TEX_ADDRESS_MODE_COUNT; a++) { static const VkSamplerAddressMode modes[PL_TEX_ADDRESS_MODE_COUNT] = { [PL_TEX_ADDRESS_CLAMP] = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, [PL_TEX_ADDRESS_REPEAT] = VK_SAMPLER_ADDRESS_MODE_REPEAT, [PL_TEX_ADDRESS_MIRROR] = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, }; VkSamplerCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = filters[s], .minFilter = filters[s], .addressModeU = modes[a], .addressModeV = modes[a], .addressModeW = modes[a], .maxAnisotropy = 1.0, }; VK(vk->CreateSampler(vk->dev, &sinfo, PL_VK_ALLOC, &p->samplers[s][a])); } } // Create the dispatch last, after any setup of `gpu` is done p->dp = pl_dispatch_create(vk->log, gpu); return pl_gpu_finalize(gpu); error: vk_gpu_destroy(gpu); return NULL; } static void vk_sync_destroy(pl_gpu gpu, pl_sync sync) { if (!sync) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_sync_vk *sync_vk = PL_PRIV(sync); #ifdef PL_HAVE_UNIX if (sync->handle_type == PL_HANDLE_FD) { if (sync->wait_handle.fd > -1) close(sync->wait_handle.fd); if (sync->signal_handle.fd > -1) close(sync->signal_handle.fd); } #endif #ifdef PL_HAVE_WIN32 if (sync->handle_type == PL_HANDLE_WIN32) { if (sync->wait_handle.handle != NULL) CloseHandle(sync->wait_handle.handle); if (sync->signal_handle.handle != NULL) CloseHandle(sync->signal_handle.handle); } // PL_HANDLE_WIN32_KMT is just an identifier. It doesn't get closed. #endif vk->DestroySemaphore(vk->dev, sync_vk->wait, PL_VK_ALLOC); vk->DestroySemaphore(vk->dev, sync_vk->signal, PL_VK_ALLOC); pl_free((void *) sync); } void vk_sync_deref(pl_gpu gpu, pl_sync sync) { if (!sync) return; struct pl_sync_vk *sync_vk = PL_PRIV(sync); if (pl_rc_deref(&sync_vk->rc)) vk_sync_destroy(gpu, sync); } static pl_sync vk_sync_create(pl_gpu gpu, enum pl_handle_type handle_type) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_sync *sync = pl_zalloc_obj(NULL, sync, struct pl_sync_vk); sync->handle_type = handle_type; struct pl_sync_vk *sync_vk = PL_PRIV(sync); pl_rc_init(&sync_vk->rc); VkExportSemaphoreCreateInfoKHR einfo = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR, .handleTypes = vk_sync_handle_type(handle_type), }; switch (handle_type) { case PL_HANDLE_FD: sync->wait_handle.fd = -1; sync->signal_handle.fd = -1; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: sync->wait_handle.handle = NULL; sync->signal_handle.handle = NULL; break; case PL_HANDLE_DMA_BUF: case PL_HANDLE_HOST_PTR: pl_unreachable(); } const VkSemaphoreCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .pNext = &einfo, }; VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sync_vk->wait)); VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sync_vk->signal)); PL_VK_NAME(SEMAPHORE, sync_vk->wait, "sync wait"); PL_VK_NAME(SEMAPHORE, sync_vk->signal, "sync signal"); #ifdef PL_HAVE_UNIX if (handle_type == PL_HANDLE_FD) { VkSemaphoreGetFdInfoKHR finfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, .semaphore = sync_vk->wait, .handleType = einfo.handleTypes, }; VK(vk->GetSemaphoreFdKHR(vk->dev, &finfo, &sync->wait_handle.fd)); finfo.semaphore = sync_vk->signal; VK(vk->GetSemaphoreFdKHR(vk->dev, &finfo, &sync->signal_handle.fd)); } #endif #ifdef PL_HAVE_WIN32 if (handle_type == PL_HANDLE_WIN32 || handle_type == PL_HANDLE_WIN32_KMT) { VkSemaphoreGetWin32HandleInfoKHR handle_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, .semaphore = sync_vk->wait, .handleType = einfo.handleTypes, }; VK(vk->GetSemaphoreWin32HandleKHR(vk->dev, &handle_info, &sync->wait_handle.handle)); handle_info.semaphore = sync_vk->signal; VK(vk->GetSemaphoreWin32HandleKHR(vk->dev, &handle_info, &sync->signal_handle.handle)); } #endif return sync; error: vk_sync_destroy(gpu, sync); return NULL; } static void vk_gpu_flush(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; CMD_SUBMIT(NULL); vk_rotate_queues(vk); vk_malloc_garbage_collect(vk->ma); } static void vk_gpu_finish(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; CMD_SUBMIT(NULL); vk_wait_idle(vk); } static bool vk_gpu_is_failed(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; return vk->failed; } struct vk_cmd *pl_vk_steal_cmd(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); struct vk_cmd *cmd = p->cmd; p->cmd = NULL; pl_mutex_unlock(&p->recording); struct vk_cmdpool *pool = vk->pool_graphics; if (!cmd || cmd->pool != pool) { vk_cmd_submit(vk, &cmd); cmd = vk_cmd_begin(vk, pool); } return cmd; } void pl_vk_print_heap(pl_gpu gpu, enum pl_log_level lev) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; vk_malloc_print_stats(vk->ma, lev); } static const struct pl_gpu_fns pl_fns_vk = { .destroy = vk_gpu_destroy, .tex_create = vk_tex_create, .tex_destroy = vk_tex_deref, .tex_invalidate = vk_tex_invalidate, .tex_clear_ex = vk_tex_clear_ex, .tex_blit = vk_tex_blit, .tex_upload = vk_tex_upload, .tex_download = vk_tex_download, .tex_poll = vk_tex_poll, .tex_export = vk_tex_export, .buf_create = vk_buf_create, .buf_destroy = vk_buf_deref, .buf_write = vk_buf_write, .buf_read = vk_buf_read, .buf_copy = vk_buf_copy, .buf_export = vk_buf_export, .buf_poll = vk_buf_poll, .desc_namespace = vk_desc_namespace, .pass_create = vk_pass_create, .pass_destroy = vk_pass_destroy, .pass_run = vk_pass_run, .sync_create = vk_sync_create, .sync_destroy = vk_sync_deref, .timer_create = vk_timer_create, .timer_destroy = vk_timer_destroy, .timer_query = vk_timer_query, .gpu_flush = vk_gpu_flush, .gpu_finish = vk_gpu_finish, .gpu_is_failed = vk_gpu_is_failed, }; libplacebo-v4.192.1/src/vulkan/gpu.h000066400000000000000000000126421417677245700172360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "command.h" #include "formats.h" #include "malloc.h" #include "utils.h" #include "../gpu.h" #include "../pl_thread.h" pl_gpu pl_gpu_create_vk(struct vk_ctx *vk); // This function takes the current graphics command and steals it from the // GPU, so the caller can do custom vk_cmd_ calls on it. The caller should // submit it as well. struct vk_cmd *pl_vk_steal_cmd(pl_gpu gpu); // Print memory usage statistics void pl_vk_print_heap(pl_gpu, enum pl_log_level); // --- pl_gpu internal structs and helpers struct pl_fmt_vk { const struct vk_format *vk_fmt; bool blit_emulated; }; enum queue_type { GRAPHICS, COMPUTE, TRANSFER, ANY, }; struct pl_vk { struct pl_gpu_fns impl; struct vk_ctx *vk; struct spirv_compiler *spirv; // Some additional cached device limits and features checks uint32_t max_push_descriptors; size_t min_texel_alignment; bool host_query_reset; // This is a pl_dispatch used (on ourselves!) for the purposes of // dispatching compute shaders for performing various emulation tasks // (e.g. partial clears, blits or emulated texture transfers). // Warning: Care must be taken to avoid recursive calls. pl_dispatch dp; // The "currently recording" command. This will be queued and replaced by // a new command every time we need to "switch" between queue families. pl_mutex recording; struct vk_cmd *cmd; pl_timer cmd_timer; // Array of VkSamplers for every combination of sample/address modes VkSampler samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT]; // To avoid spamming warnings bool warned_modless; }; struct vk_cmd *_begin_cmd(pl_gpu, enum queue_type, const char *label, pl_timer); bool _end_cmd(pl_gpu, struct vk_cmd **, bool submit); #define CMD_BEGIN(type) _begin_cmd(gpu, type, __func__, NULL) #define CMD_BEGIN_TIMED(type, timer) _begin_cmd(gpu, type, __func__, timer) #define CMD_FINISH(cmd) _end_cmd(gpu, cmd, false) #define CMD_SUBMIT(cmd) _end_cmd(gpu, cmd, true) struct pl_tex_vk { pl_rc_t rc; bool external_img; enum queue_type transfer_queue; VkImageType type; VkImage img; struct vk_memslice mem; // cached properties VkFormat img_fmt; VkImageUsageFlags usage_flags; // for sampling VkImageView view; // for rendering VkFramebuffer framebuffer; // for vk_tex_upload/download fallback code pl_fmt texel_fmt; // synchronization and current state struct vk_sem sem; VkImageLayout layout; PL_ARRAY(pl_vulkan_sem) ext_deps; // external semaphore, not owned by the pl_tex pl_sync ext_sync; // indicates an exported image bool may_invalidate; bool held; }; pl_tex vk_tex_create(pl_gpu, const struct pl_tex_params *); void vk_tex_deref(pl_gpu, pl_tex); void vk_tex_invalidate(pl_gpu, pl_tex); void vk_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color); void vk_tex_blit(pl_gpu, const struct pl_tex_blit_params *); bool vk_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_download(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_poll(pl_gpu, pl_tex, uint64_t timeout); bool vk_tex_export(pl_gpu, pl_tex, pl_sync); void vk_tex_barrier(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags, VkAccessFlags, VkImageLayout, bool export); struct pl_buf_vk { pl_rc_t rc; struct vk_memslice mem; enum queue_type update_queue; VkBufferView view; // for texel buffers // synchronization and current state struct vk_sem sem; bool exported; bool needs_flush; }; pl_buf vk_buf_create(pl_gpu, const struct pl_buf_params *); void vk_buf_deref(pl_gpu, pl_buf); void vk_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size); bool vk_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size); void vk_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); bool vk_buf_export(pl_gpu, pl_buf); bool vk_buf_poll(pl_gpu, pl_buf, uint64_t timeout); // Helper to ease buffer barrier creation. (`offset` is relative to pl_buf) void vk_buf_barrier(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags, VkAccessFlags, size_t offset, size_t size, bool export); // Flush visible writes to a buffer made by the API void vk_buf_flush(pl_gpu, struct vk_cmd *, pl_buf, size_t offset, size_t size); struct pl_pass_vk; int vk_desc_namespace(pl_gpu, enum pl_desc_type); pl_pass vk_pass_create(pl_gpu, const struct pl_pass_params *); void vk_pass_destroy(pl_gpu, pl_pass); void vk_pass_run(pl_gpu, const struct pl_pass_run_params *); struct pl_sync_vk { pl_rc_t rc; VkSemaphore wait; VkSemaphore signal; }; void vk_sync_deref(pl_gpu, pl_sync); libplacebo-v4.192.1/src/vulkan/gpu_buf.c000066400000000000000000000401351417677245700200630ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" void vk_buf_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, VkPipelineStageFlags stage, VkAccessFlags access, size_t offset, size_t size, bool export) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_assert(!export || !buf_vk->exported); // can't re-export exported buffers pl_rc_ref(&buf_vk->rc); bool needs_flush = buf_vk->needs_flush || buf->params.host_mapped || buf->params.import_handle == PL_HANDLE_HOST_PTR; bool noncoherent = buf_vk->mem.data && !buf_vk->mem.coherent; if (needs_flush && noncoherent) { buf_vk->needs_flush = false; VK(vk->FlushMappedMemoryRanges(vk->dev, 1, &(struct VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf_vk->mem.vkmem, .offset = buf_vk->mem.offset, .size = buf_vk->mem.size, })); // Just ignore errors, not much we can do about them other than // logging them and moving on... error: ; } struct vk_sync_scope last; last = vk_sem_barrier(vk, cmd, &buf_vk->sem, stage, access, export); // CONCURRENT buffers require transitioning to/from IGNORED, EXCLUSIVE // buffers require transitioning to/from the concrete QF index uint32_t qf = vk->pools.num > 1 ? VK_QUEUE_FAMILY_IGNORED : cmd->pool->qf; VkBufferMemoryBarrier barr = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcQueueFamilyIndex = buf_vk->exported ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf, .dstQueueFamilyIndex = export ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf, .srcAccessMask = last.access, .dstAccessMask = access, .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset + offset, .size = size, }; if (last.access || barr.srcQueueFamilyIndex != barr.dstQueueFamilyIndex) { vk->CmdPipelineBarrier(cmd->buf, last.stage, stage, 0, 0, NULL, 1, &barr, 0, NULL); } buf_vk->exported = export; vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, gpu, buf); } void vk_buf_deref(pl_gpu gpu, pl_buf buf) { if (!buf) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); if (pl_rc_deref(&buf_vk->rc)) { vk_sem_uninit(vk, &buf_vk->sem); vk->DestroyBufferView(vk->dev, buf_vk->view, PL_VK_ALLOC); vk_malloc_free(vk->ma, &buf_vk->mem); pl_free((void *) buf); } } pl_buf vk_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_vk); buf->params = *params; buf->params.initial_data = NULL; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_rc_init(&buf_vk->rc); if (!vk_sem_init(vk, &buf_vk->sem, PL_DEF(params->debug_tag, "vk_buf"))) goto error; struct vk_malloc_params mparams = { .reqs = { .size = PL_ALIGN2(params->size, 4), // for vk_buf_write .memoryTypeBits = UINT32_MAX, .alignment = 1, }, // these are always set, because `vk_buf_copy` can always be used .buf_usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .export_handle = params->export_handle, .import_handle = params->import_handle, .shared_mem = params->shared_mem, }; // Mandatory/optimal buffer offset alignment VkDeviceSize *align = &mparams.reqs.alignment; VkDeviceSize extra_align = vk->limits.optimalBufferCopyOffsetAlignment; // Try and align all buffers to the minimum texel alignment, to make sure // tex_upload/tex_download always gets aligned buffer copies if possible extra_align = pl_lcm(extra_align, p->min_texel_alignment); enum pl_buf_mem_type mem_type = params->memory_type; bool is_texel = false; if (params->uniform) { mparams.buf_usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; *align = pl_lcm(*align, vk->limits.minUniformBufferOffsetAlignment); mem_type = PL_BUF_MEM_DEVICE; if (params->format) { mparams.buf_usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; is_texel = true; } } if (params->storable) { mparams.buf_usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; *align = pl_lcm(*align, vk->limits.minStorageBufferOffsetAlignment); buf_vk->update_queue = vk->pool_compute ? COMPUTE : GRAPHICS; mem_type = PL_BUF_MEM_DEVICE; if (params->format) { mparams.buf_usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; is_texel = true; } } if (is_texel) { *align = pl_lcm(*align, vk->limits.minTexelBufferOffsetAlignment); *align = pl_lcm(*align, params->format->texel_size); } if (params->drawable) { mparams.buf_usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; mem_type = PL_BUF_MEM_DEVICE; } if (params->host_writable || params->initial_data) { // Buffers should be written using mapped memory if possible mparams.optimal = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; // Use the transfer queue for updates on very large buffers (1 MB) if (params->size > 1024*1024) buf_vk->update_queue = TRANSFER; } if (params->host_mapped || params->host_readable) { mparams.required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; if (params->size > 1024) { // Require cached memory for large buffers (1 kB) which may be read // from, because uncached reads are extremely slow mparams.required |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; } } if (params->host_writable || params->host_readable) { // Prefer buffers requiring frequent host operations in host mem mem_type = PL_DEF(mem_type, PL_BUF_MEM_HOST); } switch (mem_type) { case PL_BUF_MEM_AUTO: // We generally prefer VRAM since it's faster than RAM, but any number // of other requirements could potentially exclude it, so just mark it // as optimal by default. mparams.optimal |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case PL_BUF_MEM_DEVICE: // Force device local memory. mparams.required |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case PL_BUF_MEM_HOST: // This isn't a true guarantee, but actually trying to restrict the // device-local bit locks out all memory heaps on iGPUs. Requiring // the memory be host-mapped is the easiest compromise. mparams.required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; break; case PL_BUF_MEM_TYPE_COUNT: pl_unreachable(); } if (params->import_handle) { size_t offset = params->shared_mem.offset; if (PL_ALIGN(offset, *align) != offset) { PL_ERR(gpu, "Imported memory offset %zu violates minimum alignment " "requirement of enabled usage flags (%zu)!", offset, (size_t) *align); goto error; } } else { *align = pl_lcm(*align, extra_align); } if (!vk_malloc_slice(vk->ma, &buf_vk->mem, &mparams)) goto error; if (params->host_mapped) buf->data = buf_vk->mem.data; if (params->export_handle) { buf->shared_mem = buf_vk->mem.shared_mem; buf->shared_mem.drm_format_mod = DRM_FORMAT_MOD_LINEAR; buf_vk->exported = true; } if (is_texel) { struct pl_fmt_vk *fmtp = PL_PRIV(params->format); VkBufferViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, .buffer = buf_vk->mem.buf, .format = PL_DEF(fmtp->vk_fmt->bfmt, fmtp->vk_fmt->tfmt), .offset = buf_vk->mem.offset, .range = buf_vk->mem.size, }; VK(vk->CreateBufferView(vk->dev, &vinfo, PL_VK_ALLOC, &buf_vk->view)); PL_VK_NAME(BUFFER_VIEW, buf_vk->view, PL_DEF(params->debug_tag, "texel")); } if (params->initial_data) vk_buf_write(gpu, buf, 0, params->initial_data, params->size); return buf; error: vk_buf_deref(gpu, buf); return NULL; } static void invalidate_memslice(struct vk_ctx *vk, const struct vk_memslice *mem) { VK(vk->InvalidateMappedMemoryRanges(vk->dev, 1, &(VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = mem->vkmem, .offset = mem->offset, .size = mem->size, })); // Ignore errors (after logging), nothing useful we can do anyway error: ; } void vk_buf_flush(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, size_t offset, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); // We need to perform a flush if the host is capable of reading back from // the buffer, or if we intend to overwrite it using mapped memory bool can_read = buf->params.host_readable; bool can_write = buf_vk->mem.data && buf->params.host_writable; if (buf->params.host_mapped || buf->params.import_handle == PL_HANDLE_HOST_PTR) can_read = can_write = true; if (!can_read && !can_write) return; VkBufferMemoryBarrier buffBarrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcAccessMask = buf_vk->sem.write.access, .dstAccessMask = (can_read ? VK_ACCESS_HOST_READ_BIT : 0) | (can_write ? VK_ACCESS_HOST_WRITE_BIT : 0), .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset + offset, .size = size, }; vk->CmdPipelineBarrier(cmd->buf, buf_vk->sem.write.stage, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, &buffBarrier, 0, NULL); // Invalidate the mapped memory as soon as this barrier completes if (buf_vk->mem.data && !buf_vk->mem.coherent) vk_cmd_callback(cmd, (vk_cb) invalidate_memslice, vk, &buf_vk->mem); } bool vk_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); // Opportunistically check if we can re-use this buffer without flush vk_poll_commands(vk, 0); if (pl_rc_count(&buf_vk->rc) == 1) return false; // Otherwise, we're force to submit any queued command so that the // user is guaranteed to see progress eventually, even if they call // this in a tight loop CMD_SUBMIT(NULL); vk_poll_commands(vk, timeout); return pl_rc_count(&buf_vk->rc) > 1; } void vk_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); // For host-mapped buffers, we can just directly memcpy the buffer contents. // Otherwise, we can update the buffer from the GPU using a command buffer. if (buf_vk->mem.data) { // ensure no queued operations while (vk_buf_poll(gpu, buf, UINT64_MAX)) ; // do nothing uintptr_t addr = (uintptr_t) buf_vk->mem.data + offset; memcpy((void *) addr, data, size); buf_vk->needs_flush = true; } else { struct vk_cmd *cmd = CMD_BEGIN(buf_vk->update_queue); if (!cmd) { PL_ERR(gpu, "Failed updating buffer!"); return; } vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, offset, size, false); // Vulkan requires `size` to be a multiple of 4, so we need to make // sure to handle the end separately if the original data is not const size_t max_transfer = 64 * 1024; size_t size_rem = size % 4; size_t size_base = size - size_rem; VkDeviceSize buf_offset = buf_vk->mem.offset + offset; if (size_base > max_transfer) { PL_TRACE(gpu, "Using multiple vkCmdUpdateBuffer calls to upload " "large buffer. Consider using buffer-buffer transfers " "instead!"); } for (size_t xfer = 0; xfer < size_base; xfer += max_transfer) { vk->CmdUpdateBuffer(cmd->buf, buf_vk->mem.buf, buf_offset + xfer, PL_MIN(size_base, max_transfer), (void *) ((uint8_t *) data + xfer)); } if (size_rem) { uint8_t tail[4] = {0}; memcpy(tail, data, size_rem); vk->CmdUpdateBuffer(cmd->buf, buf_vk->mem.buf, buf_offset + size_base, sizeof(tail), tail); } pl_assert(!buf->params.host_readable); // no flush needed due to this CMD_FINISH(&cmd); } } bool vk_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_assert(buf_vk->mem.data); if (vk_buf_poll(gpu, buf, 0)) { // ensure no more queued writes VK(vk->WaitSemaphoresKHR(vk->dev, &(VkSemaphoreWaitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .semaphoreCount = 1, .pSemaphores = &buf_vk->sem.semaphore, .pValues = &buf_vk->sem.write.value, }, UINT64_MAX)); // process callbacks vk_poll_commands(vk, 0); } uintptr_t addr = (uintptr_t) buf_vk->mem.data + (size_t) offset; memcpy(dest, (void *) addr, size); return true; error: return false; } void vk_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *dst_vk = PL_PRIV(dst); struct pl_buf_vk *src_vk = PL_PRIV(src); struct vk_cmd *cmd = CMD_BEGIN(dst_vk->update_queue); if (!cmd) { PL_ERR(gpu, "Failed copying buffer!"); return; } vk_buf_barrier(gpu, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, dst_offset, size, false); vk_buf_barrier(gpu, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, src_offset, size, false); VkBufferCopy region = { .srcOffset = src_vk->mem.offset + src_offset, .dstOffset = dst_vk->mem.offset + dst_offset, .size = size, }; vk->CmdCopyBuffer(cmd->buf, src_vk->mem.buf, dst_vk->mem.buf, 1, ®ion); vk_buf_flush(gpu, cmd, dst, dst_offset, size); CMD_FINISH(&cmd); } bool vk_buf_export(pl_gpu gpu, pl_buf buf) { struct pl_buf_vk *buf_vk = PL_PRIV(buf); if (buf_vk->exported) return true; struct vk_cmd *cmd = CMD_BEGIN(ANY); if (!cmd) { PL_ERR(gpu, "Failed exporting buffer!"); return false; } // For the queue family ownership transfer, we can ignore all pipeline // stages since the synchronization via fences/semaphores is required vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, buf->params.size, true); return CMD_SUBMIT(&cmd); } libplacebo-v4.192.1/src/vulkan/gpu_pass.c000066400000000000000000001111651417677245700202570ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "glsl/spirv.h" // For pl_pass.priv struct pl_pass_vk { // Pipeline / render pass VkPipeline base; VkPipeline pipe; VkPipelineLayout pipeLayout; VkRenderPass renderPass; VkImageLayout initialLayout; // Descriptor set (bindings) bool use_pushd; VkDescriptorSetLayout dsLayout; VkDescriptorPool dsPool; // To keep track of which descriptor sets are and aren't available, we // allocate a fixed number and use a bitmask of all available sets. VkDescriptorSet dss[16]; uint16_t dmask; // For recompilation VkVertexInputAttributeDescription *attrs; VkPipelineCache cache; VkShaderModule vert; VkShaderModule shader; // For updating VkWriteDescriptorSet *dswrite; VkDescriptorImageInfo *dsiinfo; VkDescriptorBufferInfo *dsbinfo; VkSpecializationInfo specInfo; size_t spec_size; }; int vk_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return 0; } static void pass_destroy_cb(pl_gpu gpu, pl_pass pass) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_pass_vk *pass_vk = PL_PRIV(pass); vk->DestroyPipeline(vk->dev, pass_vk->pipe, PL_VK_ALLOC); vk->DestroyPipeline(vk->dev, pass_vk->base, PL_VK_ALLOC); vk->DestroyRenderPass(vk->dev, pass_vk->renderPass, PL_VK_ALLOC); vk->DestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, PL_VK_ALLOC); vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC); vk->DestroyDescriptorPool(vk->dev, pass_vk->dsPool, PL_VK_ALLOC); vk->DestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC); pl_free((void *) pass); } void vk_pass_destroy(pl_gpu gpu, pl_pass pass) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); if (p->cmd) { vk_cmd_callback(p->cmd, (vk_cb) pass_destroy_cb, gpu, pass); } else { vk_dev_callback(vk, (vk_cb) pass_destroy_cb, gpu, pass); } pl_mutex_unlock(&p->recording); } static const VkDescriptorType dsType[] = { [PL_DESC_SAMPLED_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, [PL_DESC_STORAGE_IMG] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, [PL_DESC_BUF_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, [PL_DESC_BUF_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, [PL_DESC_BUF_TEXEL_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, [PL_DESC_BUF_TEXEL_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, }; #define CACHE_MAGIC {'P','L','V','K'} #define CACHE_VERSION 4 static const char vk_cache_magic[4] = CACHE_MAGIC; struct vk_cache_header { char magic[sizeof(vk_cache_magic)]; int cache_version; uint64_t signature; size_t vert_spirv_len; size_t frag_spirv_len; size_t comp_spirv_len; size_t pipecache_len; }; static uint64_t cache_signature(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_vk *p = PL_PRIV(gpu); uint64_t sig = p->spirv->signature; pl_hash_merge(&sig, pl_str0_hash(params->glsl_shader)); if (params->type == PL_PASS_RASTER) pl_hash_merge(&sig, pl_str0_hash(params->vertex_shader)); return sig; } static bool vk_use_cached_program(const struct pl_pass_params *params, const struct spirv_compiler *spirv, pl_str *vert_spirv, pl_str *frag_spirv, pl_str *comp_spirv, pl_str *pipecache, uint64_t signature) { pl_str cache = { .buf = (uint8_t *) params->cached_program, .len = params->cached_program_len, }; if (cache.len < sizeof(struct vk_cache_header)) return false; struct vk_cache_header *header = (struct vk_cache_header *) cache.buf; cache = pl_str_drop(cache, sizeof(*header)); if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0) return false; if (header->cache_version != CACHE_VERSION) return false; if (header->signature != signature) return false; #define GET(ptr) \ if (cache.len < header->ptr##_len) \ return false; \ *ptr = pl_str_take(cache, header->ptr##_len); \ cache = pl_str_drop(cache, ptr->len); GET(vert_spirv); GET(frag_spirv); GET(comp_spirv); GET(pipecache); return true; } static VkResult vk_compile_glsl(pl_gpu gpu, void *alloc, enum glsl_shader_stage stage, const char *shader, pl_str *out_spirv) { struct pl_vk *p = PL_PRIV(gpu); static const char *shader_names[] = { [GLSL_SHADER_VERTEX] = "vertex", [GLSL_SHADER_FRAGMENT] = "fragment", [GLSL_SHADER_COMPUTE] = "compute", }; PL_DEBUG(gpu, "%s shader source:", shader_names[stage]); pl_msg_source(gpu->log, PL_LOG_DEBUG, shader); clock_t start = clock(); *out_spirv = spirv_compile_glsl(p->spirv, alloc, &gpu->glsl, stage, shader); if (!out_spirv->len) { pl_msg_source(gpu->log, PL_LOG_ERR, shader); return VK_ERROR_INITIALIZATION_FAILED; } pl_log_cpu_time(gpu->log, start, clock(), "translating SPIR-V"); return VK_SUCCESS; } static const VkShaderStageFlags stageFlags[] = { [PL_PASS_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT, [PL_PASS_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, }; static void destroy_pipeline(struct vk_ctx *vk, VkPipeline pipeline) { vk->DestroyPipeline(vk->dev, pipeline, PL_VK_ALLOC); } static VkResult vk_recreate_pipelines(struct vk_ctx *vk, pl_pass pass, bool derivable, VkPipeline base, VkPipeline *out_pipe) { struct pl_pass_vk *pass_vk = PL_PRIV(pass); const struct pl_pass_params *params = &pass->params; // The old pipeline might still be in use, so we have to destroy it // asynchronously with a device idle callback if (*out_pipe) { vk_dev_callback(vk, (vk_cb) destroy_pipeline, vk, *out_pipe); *out_pipe = NULL; } VkPipelineCreateFlags flags = 0; if (derivable) flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; if (base) flags |= VK_PIPELINE_CREATE_DERIVATIVE_BIT; const VkSpecializationInfo *specInfo = &pass_vk->specInfo; if (!specInfo->dataSize) specInfo = NULL; switch (params->type) { case PL_PASS_RASTER: { static const VkBlendFactor blendFactors[] = { [PL_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, [PL_BLEND_ONE] = VK_BLEND_FACTOR_ONE, [PL_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, }; VkPipelineColorBlendAttachmentState blendState = { .colorBlendOp = VK_BLEND_OP_ADD, .alphaBlendOp = VK_BLEND_OP_ADD, .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, }; const struct pl_blend_params *blend = params->blend_params; if (blend) { blendState.blendEnable = true; blendState.srcColorBlendFactor = blendFactors[blend->src_rgb]; blendState.dstColorBlendFactor = blendFactors[blend->dst_rgb]; blendState.srcAlphaBlendFactor = blendFactors[blend->src_alpha]; blendState.dstAlphaBlendFactor = blendFactors[blend->dst_alpha]; } static const VkPrimitiveTopology topologies[PL_PRIM_TYPE_COUNT] = { [PL_PRIM_TRIANGLE_LIST] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, [PL_PRIM_TRIANGLE_STRIP] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, }; VkGraphicsPipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .flags = flags, .stageCount = 2, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = pass_vk->vert, .pName = "main", }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, .module = pass_vk->shader, .pName = "main", .pSpecializationInfo = specInfo, } }, .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) { .binding = 0, .stride = params->vertex_stride, .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, }, .vertexAttributeDescriptionCount = params->num_vertex_attribs, .pVertexAttributeDescriptions = pass_vk->attrs, }, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = topologies[params->vertex_type], }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .viewportCount = 1, .scissorCount = 1, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, .lineWidth = 1.0f, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, }, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, .pAttachments = &blendState, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 2, .pDynamicStates = (VkDynamicState[]){ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, }, }, .layout = pass_vk->pipeLayout, .renderPass = pass_vk->renderPass, .basePipelineHandle = base, .basePipelineIndex = -1, }; return vk->CreateGraphicsPipelines(vk->dev, pass_vk->cache, 1, &cinfo, PL_VK_ALLOC, out_pipe); } case PL_PASS_COMPUTE: { VkComputePipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .flags = flags, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = pass_vk->shader, .pName = "main", .pSpecializationInfo = specInfo, }, .layout = pass_vk->pipeLayout, .basePipelineHandle = base, .basePipelineIndex = -1, }; return vk->CreateComputePipelines(vk->dev, pass_vk->cache, 1, &cinfo, PL_VK_ALLOC, out_pipe); } case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: break; } pl_unreachable(); } pl_pass vk_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; bool success = false; struct pl_pass *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_vk); pass->params = pl_pass_params_copy(pass, params); struct pl_pass_vk *pass_vk = PL_PRIV(pass); pass_vk->dmask = -1; // all descriptors available // temporary allocations void *tmp = pl_tmp(NULL); int num_desc = params->num_descriptors; if (!num_desc) goto no_descriptors; if (num_desc > vk->limits.maxPerStageResources) { PL_ERR(gpu, "Pass with %d descriptors exceeds the maximum number of " "per-stage resources %" PRIu32"!", num_desc, vk->limits.maxPerStageResources); goto error; } pass_vk->dswrite = pl_calloc(pass, num_desc, sizeof(VkWriteDescriptorSet)); pass_vk->dsiinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorImageInfo)); pass_vk->dsbinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorBufferInfo)); #define NUM_DS (PL_ARRAY_SIZE(pass_vk->dss)) static int dsSize[PL_DESC_TYPE_COUNT] = {0}; VkDescriptorSetLayoutBinding *bindings = pl_calloc_ptr(tmp, num_desc, bindings); uint32_t max_tex = vk->limits.maxPerStageDescriptorSampledImages, max_img = vk->limits.maxPerStageDescriptorStorageImages, max_ubo = vk->limits.maxPerStageDescriptorUniformBuffers, max_ssbo = vk->limits.maxPerStageDescriptorStorageBuffers; uint32_t *dsLimits[PL_DESC_TYPE_COUNT] = { [PL_DESC_SAMPLED_TEX] = &max_tex, [PL_DESC_STORAGE_IMG] = &max_img, [PL_DESC_BUF_UNIFORM] = &max_ubo, [PL_DESC_BUF_STORAGE] = &max_ssbo, [PL_DESC_BUF_TEXEL_UNIFORM] = &max_tex, [PL_DESC_BUF_TEXEL_STORAGE] = &max_img, }; for (int i = 0; i < num_desc; i++) { struct pl_desc *desc = ¶ms->descriptors[i]; if (!(*dsLimits[desc->type])--) { PL_ERR(gpu, "Pass exceeds the maximum number of per-stage " "descriptors of type %u!", (unsigned) desc->type); goto error; } dsSize[desc->type]++; bindings[i] = (VkDescriptorSetLayoutBinding) { .binding = desc->binding, .descriptorType = dsType[desc->type], .descriptorCount = 1, .stageFlags = stageFlags[params->type], }; } VkDescriptorSetLayoutCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pBindings = bindings, .bindingCount = num_desc, }; if (p->max_push_descriptors && num_desc <= p->max_push_descriptors) { dinfo.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; pass_vk->use_pushd = true; } else if (p->max_push_descriptors) { PL_INFO(gpu, "Pass with %d descriptors exceeds the maximum push " "descriptor count (%d). Falling back to descriptor sets!", num_desc, p->max_push_descriptors); } VK(vk->CreateDescriptorSetLayout(vk->dev, &dinfo, PL_VK_ALLOC, &pass_vk->dsLayout)); if (!pass_vk->use_pushd) { PL_ARRAY(VkDescriptorPoolSize) dsPoolSizes = {0}; for (enum pl_desc_type t = 0; t < PL_DESC_TYPE_COUNT; t++) { if (dsSize[t] > 0) { PL_ARRAY_APPEND(tmp, dsPoolSizes, (VkDescriptorPoolSize) { .type = dsType[t], .descriptorCount = dsSize[t] * NUM_DS, }); } } if (dsPoolSizes.num) { VkDescriptorPoolCreateInfo pinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = NUM_DS, .pPoolSizes = dsPoolSizes.elem, .poolSizeCount = dsPoolSizes.num, }; VK(vk->CreateDescriptorPool(vk->dev, &pinfo, PL_VK_ALLOC, &pass_vk->dsPool)); VkDescriptorSetLayout layouts[NUM_DS]; for (int i = 0; i < NUM_DS; i++) layouts[i] = pass_vk->dsLayout; VkDescriptorSetAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = pass_vk->dsPool, .descriptorSetCount = NUM_DS, .pSetLayouts = layouts, }; VK(vk->AllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss)); } } no_descriptors: ; bool has_spec = params->num_constants; if (has_spec) { PL_ARRAY(VkSpecializationMapEntry) entries = {0}; PL_ARRAY_RESIZE(pass, entries, params->num_constants); size_t spec_size = 0; for (int i = 0; i < params->num_constants; i++) { const struct pl_constant *con = ¶ms->constants[i]; size_t con_size = pl_var_type_size(con->type); entries.elem[i] = (VkSpecializationMapEntry) { .constantID = con->id, .offset = con->offset, .size = con_size, }; size_t req_size = con->offset + con_size; spec_size = PL_MAX(spec_size, req_size); } pass_vk->spec_size = spec_size; pass_vk->specInfo = (VkSpecializationInfo) { .mapEntryCount = params->num_constants, .pMapEntries = entries.elem, }; if (params->constant_data) { pass_vk->specInfo.pData = pl_memdup(pass, params->constant_data, spec_size); pass_vk->specInfo.dataSize = spec_size; } } VkPipelineLayoutCreateInfo linfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = num_desc ? 1 : 0, .pSetLayouts = &pass_vk->dsLayout, .pushConstantRangeCount = params->push_constants_size ? 1 : 0, .pPushConstantRanges = &(VkPushConstantRange){ .stageFlags = stageFlags[params->type], .offset = 0, .size = params->push_constants_size, }, }; VK(vk->CreatePipelineLayout(vk->dev, &linfo, PL_VK_ALLOC, &pass_vk->pipeLayout)); pl_str vert = {0}, frag = {0}, comp = {0}, pipecache = {0}; uint64_t sig = cache_signature(gpu, params); if (vk_use_cached_program(params, p->spirv, &vert, &frag, &comp, &pipecache, sig)) { PL_DEBUG(gpu, "Using cached SPIR-V and VkPipeline"); } else { pipecache.len = 0; switch (params->type) { case PL_PASS_RASTER: VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_VERTEX, params->vertex_shader, &vert)); VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_FRAGMENT, params->glsl_shader, &frag)); comp.len = 0; break; case PL_PASS_COMPUTE: VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_COMPUTE, params->glsl_shader, &comp)); frag.len = 0; vert.len = 0; break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } } VkPipelineCacheCreateInfo pcinfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, .pInitialData = pipecache.buf, .initialDataSize = pipecache.len, }; VK(vk->CreatePipelineCache(vk->dev, &pcinfo, PL_VK_ALLOC, &pass_vk->cache)); VkShaderModuleCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, }; clock_t start = clock(); switch (params->type) { case PL_PASS_RASTER: { sinfo.pCode = (uint32_t *) vert.buf; sinfo.codeSize = vert.len; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->vert)); PL_VK_NAME(SHADER_MODULE, pass_vk->vert, "vertex"); sinfo.pCode = (uint32_t *) frag.buf; sinfo.codeSize = frag.len; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader)); PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "fragment"); pass_vk->attrs = pl_calloc_ptr(pass, params->num_vertex_attribs, pass_vk->attrs); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; const struct vk_format **pfmt_vk = PL_PRIV(va->fmt); pass_vk->attrs[i] = (VkVertexInputAttributeDescription) { .binding = 0, .location = va->location, .offset = va->offset, .format = PL_DEF((*pfmt_vk)->bfmt, (*pfmt_vk)->tfmt), }; } VkAttachmentLoadOp loadOp; if (pass->params.load_target) { pass_vk->initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } else { pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } VkRenderPassCreateInfo rinfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .format = (VkFormat) params->target_format->signature, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = loadOp, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = pass_vk->initialLayout, .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }, }; VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &pass_vk->renderPass)); break; } case PL_PASS_COMPUTE: { sinfo.pCode = (uint32_t *) comp.buf; sinfo.codeSize = comp.len; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader)); PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "compute"); break; } case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } clock_t after_compilation = clock(); pl_log_cpu_time(gpu->log, start, after_compilation, "compiling shader"); // Create the graphics/compute pipeline VkPipeline *pipe = has_spec ? &pass_vk->base : &pass_vk->pipe; VK(vk_recreate_pipelines(vk, pass, has_spec, NULL, pipe)); pl_log_cpu_time(gpu->log, after_compilation, clock(), "creating pipeline"); if (!has_spec) { // We can free these if we no longer need them for specialization pl_free_ptr(&pass_vk->attrs); vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC); pass_vk->vert = VK_NULL_HANDLE; pass_vk->shader = VK_NULL_HANDLE; } // Update params->cached_program pl_str cache = {0}; VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &cache.len, NULL)); cache.buf = pl_alloc(tmp, cache.len); VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &cache.len, cache.buf)); if (!has_spec) { vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC); pass_vk->cache = VK_NULL_HANDLE; } struct vk_cache_header header = { .magic = CACHE_MAGIC, .cache_version = CACHE_VERSION, .signature = sig, .vert_spirv_len = vert.len, .frag_spirv_len = frag.len, .comp_spirv_len = comp.len, .pipecache_len = cache.len, }; PL_DEBUG(vk, "Pass statistics: size %zu, SPIR-V: vert %zu frag %zu comp %zu", cache.len, vert.len, frag.len, comp.len); pl_str prog = {0}; pl_str_append(pass, &prog, (pl_str){ (uint8_t *) &header, sizeof(header) }); pl_str_append(pass, &prog, vert); pl_str_append(pass, &prog, frag); pl_str_append(pass, &prog, comp); pl_str_append(pass, &prog, cache); pass->params.cached_program = prog.buf; pass->params.cached_program_len = prog.len; success = true; error: if (!success) { pass_destroy_cb(gpu, pass); pass = NULL; } #undef NUM_DS pl_free(tmp); return pass; } static const VkPipelineStageFlags passStages[] = { [PL_PASS_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, }; static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, struct pl_desc_binding db, VkDescriptorSet ds, int idx) { struct pl_vk *p = PL_PRIV(gpu); struct pl_pass_vk *pass_vk = PL_PRIV(pass); struct pl_desc *desc = &pass->params.descriptors[idx]; VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx]; *wds = (VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = ds, .dstBinding = desc->binding, .descriptorCount = 1, .descriptorType = dsType[desc->type], }; static const VkAccessFlags access[PL_DESC_ACCESS_COUNT] = { [PL_DESC_ACCESS_READONLY] = VK_ACCESS_SHADER_READ_BIT, [PL_DESC_ACCESS_WRITEONLY] = VK_ACCESS_SHADER_WRITE_BIT, [PL_DESC_ACCESS_READWRITE] = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, }; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type], VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .sampler = p->samplers[db.sample_mode][db.address_mode], .imageView = tex_vk->view, .imageLayout = tex_vk->layout, }; wds->pImageInfo = iinfo; return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type], access[desc->access], VK_IMAGE_LAYOUT_GENERAL, false); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .imageView = tex_vk->view, .imageLayout = tex_vk->layout, }; wds->pImageInfo = iinfo; return; } case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: { pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type], access[desc->access], 0, buf->params.size, false); VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx]; *binfo = (VkDescriptorBufferInfo) { .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset, .range = buf->params.size, }; wds->pBufferInfo = binfo; return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type], access[desc->access], 0, buf->params.size, false); wds->pTexelBufferView = &buf_vk->view; return; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void vk_release_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, struct pl_desc_binding db, int idx) { const struct pl_desc *desc = &pass->params.descriptors[idx]; switch (desc->type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: if (desc->access != PL_DESC_ACCESS_READONLY) { pl_buf buf = db.object; vk_buf_flush(gpu, cmd, buf, 0, buf->params.size); } return; case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: return; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void set_ds(struct pl_pass_vk *pass_vk, void *dsbit) { pass_vk->dmask |= (uintptr_t) dsbit; } static bool need_respec(pl_pass pass, const struct pl_pass_run_params *params) { struct pl_pass_vk *pass_vk = PL_PRIV(pass); if (!pass_vk->spec_size || !params->constant_data) return false; VkSpecializationInfo *specInfo = &pass_vk->specInfo; size_t size = pass_vk->spec_size; if (!specInfo->pData) { // Shader was never specialized before specInfo->pData = pl_memdup((void *) pass, params->constant_data, size); specInfo->dataSize = size; return true; } // Shader is being re-specialized with new values if (memcmp(specInfo->pData, params->constant_data, size) != 0) { memcpy((void *) specInfo->pData, params->constant_data, size); return true; } return false; } void vk_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_pass pass = params->pass; struct pl_pass_vk *pass_vk = PL_PRIV(pass); if (params->vertex_data || params->index_data) return pl_pass_run_vbo(gpu, params); // Check if we need to re-specialize this pipeline if (need_respec(pass, params)) { clock_t start = clock(); VK(vk_recreate_pipelines(vk, pass, false, pass_vk->base, &pass_vk->pipe)); pl_log_cpu_time(gpu->log, start, clock(), "re-specializing shader"); } if (!pass_vk->use_pushd) { // Wait for a free descriptor set while (!pass_vk->dmask) { PL_TRACE(gpu, "No free descriptor sets! ...blocking (slow path)"); vk_poll_commands(vk, 10000000); // 10 ms } } static const enum queue_type types[] = { [PL_PASS_RASTER] = GRAPHICS, [PL_PASS_COMPUTE] = COMPUTE, }; struct vk_cmd *cmd = CMD_BEGIN_TIMED(types[pass->params.type], params->timer); if (!cmd) goto error; // Find a descriptor set to use VkDescriptorSet ds = VK_NULL_HANDLE; if (!pass_vk->use_pushd) { for (int i = 0; i < PL_ARRAY_SIZE(pass_vk->dss); i++) { uint16_t dsbit = 1u << i; if (pass_vk->dmask & dsbit) { ds = pass_vk->dss[i]; pass_vk->dmask &= ~dsbit; // unset vk_cmd_callback(cmd, (vk_cb) set_ds, pass_vk, (void *)(uintptr_t) dsbit); break; } } } // Update the dswrite structure with all of the new values for (int i = 0; i < pass->params.num_descriptors; i++) vk_update_descriptor(gpu, cmd, pass, params->desc_bindings[i], ds, i); if (!pass_vk->use_pushd) { vk->UpdateDescriptorSets(vk->dev, pass->params.num_descriptors, pass_vk->dswrite, 0, NULL); } // Bind the pipeline, descriptor set, etc. static const VkPipelineBindPoint bindPoint[] = { [PL_PASS_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS, [PL_PASS_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE, }; vk->CmdBindPipeline(cmd->buf, bindPoint[pass->params.type], PL_DEF(pass_vk->pipe, pass_vk->base)); if (ds) { vk->CmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type], pass_vk->pipeLayout, 0, 1, &ds, 0, NULL); } if (pass_vk->use_pushd) { vk->CmdPushDescriptorSetKHR(cmd->buf, bindPoint[pass->params.type], pass_vk->pipeLayout, 0, pass->params.num_descriptors, pass_vk->dswrite); } if (pass->params.push_constants_size) { vk->CmdPushConstants(cmd->buf, pass_vk->pipeLayout, stageFlags[pass->params.type], 0, pass->params.push_constants_size, params->push_constants); } switch (pass->params.type) { case PL_PASS_RASTER: { pl_tex tex = params->target; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_buf vert = params->vertex_buf; struct pl_buf_vk *vert_vk = PL_PRIV(vert); pl_buf index = params->index_buf; struct pl_buf_vk *index_vk = index ? PL_PRIV(index) : NULL; pl_assert(vert); // In the edge case that vert = index buffer, we need to synchronize // for both flags simultaneously VkAccessFlags vbo_flags = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; if (index == vert) vbo_flags |= VK_ACCESS_INDEX_READ_BIT; vk_buf_barrier(gpu, cmd, vert, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, vbo_flags, 0, vert->params.size, false); VkDeviceSize offset = vert_vk->mem.offset + params->buf_offset; vk->CmdBindVertexBuffers(cmd->buf, 0, 1, &vert_vk->mem.buf, &offset); if (index) { if (index != vert) { vk_buf_barrier(gpu, cmd, index, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_INDEX_READ_BIT, 0, index->params.size, false); } static const VkIndexType index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = VK_INDEX_TYPE_UINT16, [PL_INDEX_UINT32] = VK_INDEX_TYPE_UINT32, }; vk->CmdBindIndexBuffer(cmd->buf, index_vk->mem.buf, index_vk->mem.offset + params->index_offset, index_fmts[params->index_fmt]); } vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout, false); VkViewport viewport = { .x = params->viewport.x0, .y = params->viewport.y0, .width = pl_rect_w(params->viewport), .height = pl_rect_h(params->viewport), }; VkRect2D scissor = { .offset = {params->scissors.x0, params->scissors.y0}, .extent = {pl_rect_w(params->scissors), pl_rect_h(params->scissors)}, }; vk->CmdSetViewport(cmd->buf, 0, 1, &viewport); vk->CmdSetScissor(cmd->buf, 0, 1, &scissor); VkRenderPassBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass_vk->renderPass, .framebuffer = tex_vk->framebuffer, .renderArea.extent = {tex->params.w, tex->params.h}, }; vk->CmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE); if (index) { vk->CmdDrawIndexed(cmd->buf, params->vertex_count, 1, 0, 0, 0); } else { vk->CmdDraw(cmd->buf, params->vertex_count, 1, 0, 0); } vk->CmdEndRenderPass(cmd->buf); // The renderPass implicitly transitions the texture to this layout tex_vk->layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; break; } case PL_PASS_COMPUTE: vk->CmdDispatch(cmd->buf, params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); }; for (int i = 0; i < pass->params.num_descriptors; i++) vk_release_descriptor(gpu, cmd, pass, params->desc_bindings[i], i); // submit this command buffer for better intra-frame granularity CMD_SUBMIT(&cmd); error: return; } libplacebo-v4.192.1/src/vulkan/gpu_tex.c000066400000000000000000001214501417677245700201070ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, VkPipelineStageFlags stage, VkAccessFlags access, VkImageLayout layout, bool export) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_rc_ref(&tex_vk->rc); pl_assert(!tex_vk->held); for (int i = 0; i < tex_vk->ext_deps.num; i++) vk_cmd_dep(cmd, stage, tex_vk->ext_deps.elem[i]); tex_vk->ext_deps.num = 0; // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation for // render passes, which means that we don't actually need to perform // any image layout transition if (layout == VK_IMAGE_LAYOUT_UNDEFINED) layout = tex_vk->layout; struct vk_sync_scope last; bool is_trans = layout != tex_vk->layout; last = vk_sem_barrier(vk, cmd, &tex_vk->sem, stage, access, is_trans || export); // CONCURRENT images require transitioning to/from IGNORED, EXCLUSIVE // images require transitioning to/from the concrete QF index uint32_t qf = vk->pools.num > 1 ? VK_QUEUE_FAMILY_IGNORED : cmd->pool->qf; VkImageMemoryBarrier barr = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .oldLayout = tex_vk->layout, .newLayout = layout, .srcQueueFamilyIndex = qf, .dstQueueFamilyIndex = export ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf, .srcAccessMask = last.access, .dstAccessMask = access, .image = tex_vk->img, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }, }; if (tex_vk->ext_sync) { if (tex_vk->layout != VK_IMAGE_LAYOUT_UNDEFINED) { barr.srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL_KHR; pl_assert(!export); // can't re-export exported images } vk_cmd_callback(cmd, (vk_cb) vk_sync_deref, gpu, tex_vk->ext_sync); tex_vk->ext_sync = NULL; } if (tex_vk->may_invalidate) { tex_vk->may_invalidate = false; barr.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; } bool is_xfer = barr.srcQueueFamilyIndex != barr.dstQueueFamilyIndex; if (last.access || is_trans || is_xfer) { vk->CmdPipelineBarrier(cmd->buf, last.stage, stage, 0, 0, NULL, 0, NULL, 1, &barr); } tex_vk->layout = layout; vk_cmd_callback(cmd, (vk_cb) vk_tex_deref, gpu, tex); } static void vk_tex_destroy(pl_gpu gpu, struct pl_tex *tex) { if (!tex) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk_sync_deref(gpu, tex_vk->ext_sync); vk_sem_uninit(vk, &tex_vk->sem); vk->DestroyFramebuffer(vk->dev, tex_vk->framebuffer, PL_VK_ALLOC); vk->DestroyImageView(vk->dev, tex_vk->view, PL_VK_ALLOC); if (!tex_vk->external_img) { vk->DestroyImage(vk->dev, tex_vk->img, PL_VK_ALLOC); vk_malloc_free(vk->ma, &tex_vk->mem); } pl_free(tex); } void vk_tex_deref(pl_gpu gpu, pl_tex tex) { if (!tex) return; struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (pl_rc_deref(&tex_vk->rc)) vk_tex_destroy(gpu, (struct pl_tex *) tex); } // Initializes non-VkImage values like the image view, framebuffers, etc. static bool vk_init_image(pl_gpu gpu, pl_tex tex, pl_debug_tag debug_tag) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; const struct pl_tex_params *params = &tex->params; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_assert(tex_vk->img); PL_VK_NAME(IMAGE, tex_vk->img, debug_tag); pl_rc_init(&tex_vk->rc); if (!vk_sem_init(vk, &tex_vk->sem, debug_tag)) return false; tex_vk->layout = VK_IMAGE_LAYOUT_UNDEFINED; tex_vk->transfer_queue = GRAPHICS; // Always use the transfer pool if available, for efficiency if ((params->host_writable || params->host_readable) && vk->pool_transfer) tex_vk->transfer_queue = TRANSFER; // For emulated formats: force usage of the compute queue, because we // can't properly track cross-queue dependencies for buffers (yet?) if (params->format->emulated) tex_vk->transfer_queue = COMPUTE; bool ret = false; VkRenderPass dummyPass = VK_NULL_HANDLE; if (params->sampleable || params->renderable || params->storable) { static const VkImageViewType viewType[] = { [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, }; VkImageViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = tex_vk->img, .viewType = viewType[tex_vk->type], .format = tex_vk->img_fmt, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }, }; VK(vk->CreateImageView(vk->dev, &vinfo, PL_VK_ALLOC, &tex_vk->view)); PL_VK_NAME(IMAGE_VIEW, tex_vk->view, debug_tag); } if (params->renderable) { // Framebuffers need to be created against a specific render pass // layout, so we need to temporarily create a skeleton/dummy render // pass for vulkan to figure out the compatibility VkRenderPassCreateInfo rinfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .format = tex_vk->img_fmt, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }, }; VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &dummyPass)); VkFramebufferCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .renderPass = dummyPass, .attachmentCount = 1, .pAttachments = &tex_vk->view, .width = tex->params.w, .height = tex->params.h, .layers = 1, }; if (finfo.width > vk->limits.maxFramebufferWidth || finfo.height > vk->limits.maxFramebufferHeight) { PL_ERR(gpu, "Framebuffer of size %dx%d exceeds the maximum allowed " "dimensions: %dx%d", finfo.width, finfo.height, vk->limits.maxFramebufferWidth, vk->limits.maxFramebufferHeight); goto error; } VK(vk->CreateFramebuffer(vk->dev, &finfo, PL_VK_ALLOC, &tex_vk->framebuffer)); PL_VK_NAME(FRAMEBUFFER, tex_vk->framebuffer, debug_tag); } ret = true; error: vk->DestroyRenderPass(vk->dev, dummyPass, PL_VK_ALLOC); return ret; } pl_tex vk_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; enum pl_handle_type handle_type = params->export_handle | params->import_handle; struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct pl_fmt_vk *fmtp = PL_PRIV(params->format); tex_vk->img_fmt = fmtp->vk_fmt->tfmt; switch (pl_tex_params_dimension(*params)) { case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; } if (params->format->emulated) { tex_vk->texel_fmt = pl_find_fmt(gpu, params->format->type, 1, 0, params->format->host_bits[0], PL_FMT_CAP_TEXEL_UNIFORM); if (!tex_vk->texel_fmt) { PL_ERR(gpu, "Failed picking texel format for emulated texture!"); goto error; } // Statically check to see if we'd even be able to upload it at all // and refuse right away if not. In theory, uploading can still fail // based on the size of pl_tex_transfer_params.row_pitch, but for now // this should be enough. uint64_t texels = params->w * PL_DEF(params->h, 1) * PL_DEF(params->d, 1) * params->format->num_components; if (texels > gpu->limits.max_buffer_texels) { PL_ERR(gpu, "Failed creating texture with emulated texture format: " "texture dimensions exceed maximum texel buffer size! Try " "again with a different (non-emulated) format?"); goto error; } // Our format emulation requires storage image support. In order to // make a bunch of checks happy, just mark it off as storable (and also // enable VK_IMAGE_USAGE_STORAGE_BIT, which we do below) tex->params.storable = true; } if (fmtp->blit_emulated) { // Enable what's required for sampling tex->params.sampleable = params->format->caps & PL_FMT_CAP_SAMPLEABLE; tex->params.storable = true; } VkImageUsageFlags usage = 0; if (tex->params.sampleable) usage |= VK_IMAGE_USAGE_SAMPLED_BIT; if (tex->params.renderable) usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; if (tex->params.storable) usage |= VK_IMAGE_USAGE_STORAGE_BIT; if (tex->params.host_readable || tex->params.blit_src) usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (tex->params.host_writable || tex->params.blit_dst || params->initial_data) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; if (!usage) { // Vulkan requires images have at least *some* image usage set, but our // API is perfectly happy with a (useless) image. So just put // VK_IMAGE_USAGE_TRANSFER_DST_BIT since this harmless. usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; } // FIXME: Since we can't keep track of queue family ownership properly, // and we don't know in advance what types of queue families this image // will belong to, we're forced to share all of our images between all // command pools. uint32_t qfs[3] = {0}; for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkImageDrmFormatModifierExplicitCreateInfoEXT drm_explicit = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, .drmFormatModifier = params->shared_mem.drm_format_mod, .drmFormatModifierPlaneCount = 1, .pPlaneLayouts = &(VkSubresourceLayout) { .rowPitch = PL_DEF(params->shared_mem.stride_w, params->w), .depthPitch = params->d ? PL_DEF(params->shared_mem.stride_h, params->h) : 0, .offset = params->shared_mem.offset, }, }; VkImageDrmFormatModifierListCreateInfoEXT drm_list = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT, .drmFormatModifierCount = params->format->num_modifiers, .pDrmFormatModifiers = params->format->modifiers, }; VkExternalMemoryImageCreateInfoKHR ext_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR, .handleTypes = vk_mem_handle_type(handle_type), }; VkImageCreateInfo iinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = handle_type ? &ext_info : NULL, .imageType = tex_vk->type, .format = tex_vk->img_fmt, .extent = (VkExtent3D) { .width = params->w, .height = PL_MAX(1, params->h), .depth = PL_MAX(1, params->d) }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; bool has_drm_mods = vk->GetImageDrmFormatModifierPropertiesEXT; if (handle_type == PL_HANDLE_DMA_BUF && !has_drm_mods && !p->warned_modless) { PL_WARN(gpu, "Using legacy hacks for DMA buffers without modifiers. " "May result in corruption!"); p->warned_modless = true; } struct vk_malloc_params mparams = { .optimal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, .export_handle = params->export_handle, .import_handle = params->import_handle, .shared_mem = params->shared_mem, }; if (params->import_handle == PL_HANDLE_DMA_BUF) { if (has_drm_mods) { // We have VK_EXT_image_drm_format_modifier, so we can use // format modifiers properly vk_link_struct(&iinfo, &drm_explicit); iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; mparams.shared_mem.offset = 0x0; // handled via plane offsets } else { // Legacy fallback for older drivers. Based on hacks and guesswork. switch (drm_explicit.drmFormatModifier) { case DRM_FORMAT_MOD_LINEAR: iinfo.tiling = VK_IMAGE_TILING_LINEAR; break; } } } if (params->export_handle == PL_HANDLE_DMA_BUF && has_drm_mods) { vk_link_struct(&iinfo, &drm_list); iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; } // Double-check physical image format limits and fail if invalid VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .drmFormatModifier = drm_explicit.drmFormatModifier, .sharingMode = iinfo.sharingMode, .queueFamilyIndexCount = iinfo.queueFamilyIndexCount, .pQueueFamilyIndices = iinfo.pQueueFamilyIndices, }; VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR, .handleType = ext_info.handleTypes, }; if (handle_type == PL_HANDLE_DMA_BUF && has_drm_mods) vk_link_struct(&ext_pinfo, &drm_pinfo); VkPhysicalDeviceImageFormatInfo2KHR pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .pNext = handle_type ? &ext_pinfo : NULL, .format = iinfo.format, .type = iinfo.imageType, .tiling = iinfo.tiling, .usage = iinfo.usage, .flags = iinfo.flags, }; VkExternalImageFormatPropertiesKHR ext_props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, }; VkImageFormatProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, .pNext = handle_type ? &ext_props : NULL, }; VkResult res; res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { goto error; } else { PL_VK_ASSERT(res, "Querying image format properties"); } VkExtent3D max = props.imageFormatProperties.maxExtent; if (params->w > max.width || params->h > max.height || params->d > max.depth) { PL_ERR(gpu, "Requested image size %dx%dx%d exceeds the maximum allowed " "dimensions %dx%dx%d for vulkan image format %x", params->w, params->h, params->d, max.width, max.height, max.depth, (unsigned) iinfo.format); goto error; } // Ensure the handle type is supported if (handle_type) { bool ok = vk_external_mem_check(vk, &ext_props.externalMemoryProperties, handle_type, params->import_handle); if (!ok) { PL_ERR(gpu, "Requested handle type is not compatible with the " "specified combination of image parameters. Possibly the " "handle type is unsupported altogether?"); goto error; } } VK(vk->CreateImage(vk->dev, &iinfo, PL_VK_ALLOC, &tex_vk->img)); tex_vk->usage_flags = iinfo.usage; VkMemoryDedicatedRequirements ded_reqs = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR, }; VkMemoryRequirements2 reqs = { .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR, .pNext = &ded_reqs, }; VkImageMemoryRequirementsInfo2 req_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR, .image = tex_vk->img, }; vk->GetImageMemoryRequirements2(vk->dev, &req_info, &reqs); mparams.reqs = reqs.memoryRequirements; if (ded_reqs.prefersDedicatedAllocation) { mparams.ded_image = tex_vk->img; if (params->import_handle) mparams.shared_mem.size = reqs.memoryRequirements.size; } struct vk_memslice *mem = &tex_vk->mem; if (!vk_malloc_slice(vk->ma, mem, &mparams)) goto error; const char *debug_tag = params->debug_tag ? params->debug_tag : params->import_handle ? "imported" : "created"; VK(vk->BindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); if (!vk_init_image(gpu, tex, debug_tag)) goto error; if (params->export_handle) tex->shared_mem = tex_vk->mem.shared_mem; if (params->export_handle == PL_HANDLE_DMA_BUF) { if (vk->GetImageDrmFormatModifierPropertiesEXT) { // Query the DRM format modifier and plane layout from the driver VkImageDrmFormatModifierPropertiesEXT mod_props = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, }; VK(vk->GetImageDrmFormatModifierPropertiesEXT(vk->dev, tex_vk->img, &mod_props)); tex->shared_mem.drm_format_mod = mod_props.drmFormatModifier; VkSubresourceLayout layout = {0}; VkImageSubresource plane = { .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT, }; vk->GetImageSubresourceLayout(vk->dev, tex_vk->img, &plane, &layout); if (layout.offset != 0) { PL_ERR(gpu, "Exported DRM plane 0 has nonzero offset %zu, " "this should never happen! Erroring for safety...", (size_t) layout.offset); goto error; } tex->shared_mem.stride_w = layout.rowPitch; tex->shared_mem.stride_h = layout.depthPitch; } else { // Fallback for no modifiers, just do something stupid. tex->shared_mem.drm_format_mod = DRM_FORMAT_MOD_INVALID; tex->shared_mem.stride_w = params->w; tex->shared_mem.stride_h = params->h; } } if (params->initial_data) { struct pl_tex_transfer_params ul_params = { .tex = tex, .ptr = (void *) params->initial_data, .rc = { 0, 0, 0, params->w, params->h, params->d }, }; // Since we re-use GPU helpers which require writable images, just fake it bool writable = tex->params.host_writable; tex->params.host_writable = true; if (!pl_tex_upload(gpu, &ul_params)) goto error; tex->params.host_writable = writable; } return tex; error: vk_tex_destroy(gpu, tex); return NULL; } void vk_tex_invalidate(pl_gpu gpu, pl_tex tex) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); tex_vk->may_invalidate = true; } void vk_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) return; vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, false); pl_static_assert(sizeof(VkClearColorValue) == sizeof(union pl_clear_color)); const VkClearColorValue *clearColor = (const VkClearColorValue *) &color; static const VkImageSubresourceRange range = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }; vk->CmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->layout, clearColor, 1, &range); CMD_FINISH(&cmd); } void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *src_vk = PL_PRIV(params->src); struct pl_tex_vk *dst_vk = PL_PRIV(params->dst); struct pl_fmt_vk *src_fmtp = PL_PRIV(params->src->params.format); struct pl_fmt_vk *dst_fmtp = PL_PRIV(params->dst->params.format); bool blit_emulated = src_fmtp->blit_emulated || dst_fmtp->blit_emulated; struct pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; bool requires_scaling = !pl_rect3d_eq(src_rc, dst_rc); if (requires_scaling && blit_emulated) { if (!pl_tex_blit_compute(gpu, p->dp, params)) PL_ERR(gpu, "Failed emulating texture blit, incompatible textures?"); return; } struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) return; vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, false); vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, false); static const VkImageSubresourceLayers layers = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }; // When the blit operation doesn't require scaling, we can use the more // efficient vkCmdCopyImage instead of vkCmdBlitImage if (!requires_scaling) { pl_rect3d_normalize(&src_rc); VkImageCopy region = { .srcSubresource = layers, .dstSubresource = layers, .srcOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, .dstOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, .extent = { pl_rect_w(src_rc), pl_rect_h(src_rc), pl_rect_d(src_rc), }, }; vk->CmdCopyImage(cmd->buf, src_vk->img, src_vk->layout, dst_vk->img, dst_vk->layout, 1, ®ion); } else { VkImageBlit region = { .srcSubresource = layers, .dstSubresource = layers, .srcOffsets = {{src_rc.x0, src_rc.y0, src_rc.z0}, {src_rc.x1, src_rc.y1, src_rc.z1}}, .dstOffsets = {{dst_rc.x0, dst_rc.y0, dst_rc.z0}, {dst_rc.x1, dst_rc.y1, dst_rc.z1}}, }; static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, }; vk->CmdBlitImage(cmd->buf, src_vk->img, src_vk->layout, dst_vk->img, dst_vk->layout, 1, ®ion, filters[params->sample_mode]); } CMD_FINISH(&cmd); } // Determine the best queue type to perform a buffer<->image copy on static enum queue_type vk_img_copy_queue(pl_gpu gpu, pl_tex tex, const struct VkBufferImageCopy *region) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; const struct pl_tex_vk *tex_vk = PL_PRIV(tex); enum queue_type queue = tex_vk->transfer_queue; if (queue != TRANSFER || !vk->pool_transfer) return queue; VkExtent3D alignment = vk->pool_transfer->props.minImageTransferGranularity; enum queue_type fallback = GRAPHICS; if (gpu->limits.compute_queues > gpu->limits.fragment_queues) fallback = COMPUTE; // prefer async compute queue int tex_w = PL_DEF(tex->params.w, 1), tex_h = PL_DEF(tex->params.h, 1), tex_d = PL_DEF(tex->params.d, 1); bool full_w = region->imageOffset.x + region->imageExtent.width == tex_w, full_h = region->imageOffset.y + region->imageExtent.height == tex_h, full_d = region->imageOffset.z + region->imageExtent.depth == tex_d; if (alignment.width) { bool unaligned = false; unaligned |= region->imageOffset.x % alignment.width; unaligned |= region->imageOffset.y % alignment.height; unaligned |= region->imageOffset.z % alignment.depth; unaligned |= (region->imageExtent.width % alignment.width) && !full_w; unaligned |= (region->imageExtent.height % alignment.height) && !full_h; unaligned |= (region->imageExtent.depth % alignment.depth) && !full_d; return unaligned ? fallback : queue; } else { // an alignment of {0} means the copy must span the entire image bool unaligned = false; unaligned |= region->imageOffset.x || !full_w; unaligned |= region->imageOffset.y || !full_h; unaligned |= region->imageOffset.z || !full_d; return unaligned ? fallback : queue; } } static void tex_xfer_cb(void *ctx, void *arg) { void (*fun)(void *priv) = ctx; fun(arg); } bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (!params->buf) return pl_tex_upload_pbo(gpu, params); pl_buf buf = params->buf; struct pl_buf_vk *buf_vk = PL_PRIV(buf); struct pl_rect3d rc = params->rc; size_t size = pl_tex_transfer_size(params); size_t buf_offset = buf_vk->mem.offset + params->buf_offset; bool unaligned = buf_offset % fmt->texel_size; if (unaligned) PL_TRACE(gpu, "vk_tex_upload: unaligned transfer (slow path)"); if (fmt->emulated || unaligned) { bool ubo; if (fmt->emulated) { if (size <= gpu->limits.max_ubo_size) { ubo = true; } else if (size <= gpu->limits.max_ssbo_size) { ubo = false; } else { // TODO: Implement strided upload path if really necessary PL_ERR(gpu, "Texel buffer size requirements exceed GPU " "capabilities, failed uploading!"); goto error; } } // Copy the source data buffer into an intermediate buffer pl_buf tbuf = pl_buf_create(gpu, pl_buf_params( .uniform = fmt->emulated && ubo, .storable = fmt->emulated && !ubo, .size = size, .memory_type = PL_BUF_MEM_DEVICE, .format = tex_vk->texel_fmt, )); if (!tbuf) { PL_ERR(gpu, "Failed creating buffer for tex upload fallback!"); goto error; } struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer); if (!cmd) goto error; struct pl_buf_vk *tbuf_vk = PL_PRIV(tbuf); VkBufferCopy region = { .srcOffset = buf_offset, .dstOffset = tbuf_vk->mem.offset, .size = size, }; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, params->buf_offset, size, false); vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, size, false); vk->CmdCopyBuffer(cmd->buf, buf_vk->mem.buf, tbuf_vk->mem.buf, 1, ®ion); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); CMD_FINISH(&cmd); struct pl_tex_transfer_params fixed = *params; fixed.buf = tbuf; fixed.buf_offset = 0; bool ok = fmt->emulated ? pl_tex_upload_texel(gpu, p->dp, &fixed) : pl_tex_upload(gpu, &fixed); pl_buf_destroy(gpu, &tbuf); return ok; } else { pl_assert(fmt->texel_align == fmt->texel_size); VkBufferImageCopy region = { .bufferOffset = buf_offset, .bufferRowLength = params->row_pitch / fmt->texel_size, .bufferImageHeight = params->depth_pitch / params->row_pitch, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }, }; enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, params->buf_offset, size, false); vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, false); vk->CmdCopyBufferToImage(cmd->buf, buf_vk->mem.buf, tex_vk->img, tex_vk->layout, 1, ®ion); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); CMD_FINISH(&cmd); } return true; error: return false; } bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (!params->buf) return pl_tex_download_pbo(gpu, params); pl_buf buf = params->buf; struct pl_buf_vk *buf_vk = PL_PRIV(buf); struct pl_rect3d rc = params->rc; size_t size = pl_tex_transfer_size(params); size_t buf_offset = buf_vk->mem.offset + params->buf_offset; bool unaligned = buf_offset % fmt->texel_size; if (unaligned) PL_TRACE(gpu, "vk_tex_download: unaligned transfer (slow path)"); if (fmt->emulated || unaligned) { // Download into an intermediate buffer first pl_buf tbuf = pl_buf_create(gpu, pl_buf_params( .storable = fmt->emulated, .size = size, .memory_type = PL_BUF_MEM_DEVICE, .format = tex_vk->texel_fmt, )); if (!tbuf) { PL_ERR(gpu, "Failed creating buffer for tex download fallback!"); goto error; } struct pl_tex_transfer_params fixed = *params; fixed.buf = tbuf; fixed.buf_offset = 0; bool ok = fmt->emulated ? pl_tex_download_texel(gpu, p->dp, &fixed) : pl_tex_download(gpu, &fixed); if (!ok) { pl_buf_destroy(gpu, &tbuf); goto error; } struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer); if (!cmd) { pl_buf_destroy(gpu, &tbuf); goto error; } struct pl_buf_vk *tbuf_vk = PL_PRIV(tbuf); VkBufferCopy region = { .srcOffset = tbuf_vk->mem.offset, .dstOffset = buf_offset, .size = size, }; vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, size, false); vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, params->buf_offset, size, false); vk->CmdCopyBuffer(cmd->buf, tbuf_vk->mem.buf, buf_vk->mem.buf, 1, ®ion); vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); CMD_FINISH(&cmd); pl_buf_destroy(gpu, &tbuf); } else { pl_assert(fmt->texel_align == fmt->texel_size); VkBufferImageCopy region = { .bufferOffset = buf_offset, .bufferRowLength = params->row_pitch / fmt->texel_size, .bufferImageHeight = params->depth_pitch / params->row_pitch, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }, }; enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, params->buf_offset, size, false); vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, false); vk->CmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->layout, buf_vk->mem.buf, 1, ®ion); vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); CMD_FINISH(&cmd); } return true; error: return false; } bool vk_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); // Opportunistically check if we can re-use this texture without flush vk_poll_commands(vk, 0); if (pl_rc_count(&tex_vk->rc) == 1) return false; // Otherwise, we're force to submit any queued command so that the user is // guaranteed to see progress eventually, even if they call this in a loop CMD_SUBMIT(NULL); vk_poll_commands(vk, timeout); return pl_rc_count(&tex_vk->rc) > 1; } bool vk_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct pl_sync_vk *sync_vk = PL_PRIV(sync); struct vk_cmd *cmd = CMD_BEGIN(ANY); if (!cmd) goto error; vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, VK_IMAGE_LAYOUT_GENERAL, true); // Make the next barrier appear as though coming from a different queue tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; vk_cmd_sig(cmd, (pl_vulkan_sem){ sync_vk->wait }); if (!CMD_SUBMIT(&cmd)) goto error; // Remember the other dependency and hold on to the sync object PL_ARRAY_APPEND(tex, tex_vk->ext_deps, (pl_vulkan_sem){ sync_vk->signal }); pl_rc_ref(&sync_vk->rc); tex_vk->ext_sync = sync; return true; error: PL_ERR(gpu, "Failed exporting shared texture!"); return false; } pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params) { pl_fmt format = NULL; for (int i = 0; i < gpu->num_formats; i++) { const struct vk_format **fmt = PL_PRIV(gpu->formats[i]); if ((*fmt)->tfmt == params->format) { format = gpu->formats[i]; break; } } if (!format) { PL_ERR(gpu, "Could not find pl_fmt suitable for wrapped image " "with format %s", vk_fmt_name(params->format)); return NULL; } struct pl_tex *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); tex->params = (struct pl_tex_params) { .format = format, .w = params->width, .h = params->height, .d = params->depth, .sampleable = !!(params->usage & VK_IMAGE_USAGE_SAMPLED_BIT), .renderable = !!(params->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), .storable = !!(params->usage & VK_IMAGE_USAGE_STORAGE_BIT), .blit_src = !!(params->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), .blit_dst = !!(params->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_writable = !!(params->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_readable = !!(params->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), .user_data = params->user_data, .debug_tag = params->debug_tag, }; // Mask out capabilities not permitted by the `pl_fmt` #define MASK(field, cap) \ do { \ if (tex->params.field && !(format->caps & cap)) { \ PL_WARN(gpu, "Masking `" #field "` from wrapped texture because " \ "the corresponding format '%s' does not support " #cap, \ format->name); \ tex->params.field = false; \ } \ } while (0) MASK(sampleable, PL_FMT_CAP_SAMPLEABLE); MASK(storable, PL_FMT_CAP_STORABLE); MASK(blit_src, PL_FMT_CAP_BLITTABLE); MASK(blit_dst, PL_FMT_CAP_BLITTABLE); #undef MASK // For simplicity, explicitly mask out blit emulation for wrapped textures struct pl_fmt_vk *fmtp = PL_PRIV(format); if (fmtp->blit_emulated) { tex->params.blit_src = false; tex->params.blit_dst = false; } struct pl_tex_vk *tex_vk = PL_PRIV(tex); tex_vk->type = VK_IMAGE_TYPE_2D; tex_vk->external_img = true; tex_vk->held = true; tex_vk->img = params->image; tex_vk->img_fmt = params->format; tex_vk->usage_flags = params->usage; if (!vk_init_image(gpu, tex, PL_DEF(params->debug_tag, "wrapped"))) goto error; return tex; error: vk_tex_destroy(gpu, tex); return NULL; } VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, VkImageUsageFlags *out_flags) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (out_format) *out_format = tex_vk->img_fmt; if (out_flags) *out_flags = tex_vk->usage_flags; return tex_vk->img; } bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout, pl_vulkan_sem sem_out) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_assert(sem_out.sem); if (tex_vk->held) { PL_ERR(gpu, "Attempting to hold an already held image!"); return false; } struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) { PL_ERR(gpu, "Failed holding external image!"); return false; } vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, layout, false); vk_cmd_sig(cmd, sem_out); tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; tex_vk->held = CMD_SUBMIT(&cmd); return tex_vk->held; } bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex, VkImageLayout *layout, pl_vulkan_sem sem_out) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); bool user_may_invalidate = tex_vk->may_invalidate; if (!pl_vulkan_hold(gpu, tex, tex_vk->layout, sem_out)) return false; if (user_may_invalidate) { *layout = VK_IMAGE_LAYOUT_UNDEFINED; } else { *layout = tex_vk->layout; } return true; } void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout, pl_vulkan_sem sem_in) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (!tex_vk->held) { PL_ERR(gpu, "Attempting to release an unheld image?"); return; } if (sem_in.sem) PL_ARRAY_APPEND(tex, tex_vk->ext_deps, sem_in); tex_vk->layout = layout; tex_vk->held = false; } libplacebo-v4.192.1/src/vulkan/malloc.c000066400000000000000000001017211417677245700177020ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "malloc.h" #include "command.h" #include "utils.h" #include "pl_thread.h" #ifdef PL_HAVE_UNIX #include #include #include #endif // Controls the page size alignment, to help coalesce allocations into the same // slab. Pages are rounded up to multiples of this value. (Default: 4 KB) #define PAGE_SIZE_ALIGN (1LLU << 12) // Controls the minimum/maximum number of pages for new slabs. As slabs are // exhausted of memory, the number of pages per new slab grows exponentially, // starting with the minimum until the maximum is reached. // // Note: The maximum must never exceed the size of `vk_slab.spacemap`. #define MINIMUM_PAGE_COUNT 4 #define MAXIMUM_PAGE_COUNT (sizeof(uint64_t) * 8) // Controls the maximum page size. Any allocations above this threshold // will be served by dedicated allocations. (Default: 64 MB) #define MAXIMUM_PAGE_SIZE (1LLU << 26) // Controls the minimum slab size, to avoid excessive re-allocation of very // small slabs. (Default: 256 KB) #define MINIMUM_SLAB_SIZE (1LLU << 18) // Controls the maximum slab size, to avoid ballooning memory requirements // due to overzealous allocation of extra pages. (Default: 256 MB) #define MAXIMUM_SLAB_SIZE (1LLU << 28) // How long to wait before garbage collecting empty slabs. Slabs older than // this many invocations of `vk_malloc_garbage_collect` will be released. #define MAXIMUM_SLAB_AGE 8 // A single slab represents a contiguous region of allocated memory. Actual // allocations are served as pages of this. Slabs are organized into pools, // each of which contains a list of slabs of differing page sizes. struct vk_slab { pl_mutex lock; VkDeviceMemory mem; // underlying device allocation VkDeviceSize size; // total allocated size of `mem` VkMemoryType mtype; // underlying memory type bool dedicated; // slab is allocated specifically for one object bool imported; // slab represents an imported memory allocation // free space accounting (only for non-dedicated slabs) uint64_t spacemap; // bitset of available pages size_t pagesize; // size in bytes per page size_t used; // number of bytes actually in use uint64_t age; // timestamp of last use // optional, depends on the memory type: VkBuffer buffer; // buffer spanning the entire slab void *data; // mapped memory corresponding to `mem` bool coherent; // mapped memory is coherent union pl_handle handle; // handle associated with this device memory enum pl_handle_type handle_type; }; // Represents a single memory pool. We keep track of a vk_pool for each // combination of malloc parameters. This shouldn't actually be that many in // practice, because some combinations simply never occur, and others will // generally be the same for the same objects. // // Note: `vk_pool` addresses are not immutable, so we mustn't expose any // dangling references to a `vk_pool` from e.g. `vk_memslice.priv = vk_slab`. struct vk_pool { struct vk_malloc_params params; // allocation params (with some fields nulled) PL_ARRAY(struct vk_slab *) slabs; // array of slabs, unsorted int index; // running index in `vk_malloc.pools` }; // The overall state of the allocator, which keeps track of a vk_pool for each // memory type. struct vk_malloc { struct vk_ctx *vk; pl_mutex lock; VkPhysicalDeviceMemoryProperties props; PL_ARRAY(struct vk_pool) pools; uint64_t age; }; static inline float efficiency(size_t used, size_t total) { if (!total) return 100.0; return 100.0f * used / total; } static const char *print_size(char buf[8], size_t size) { const char *suffixes = "\0KMG"; while (suffixes[1] && size > 9999) { size >>= 10; suffixes++; } int ret = *suffixes ? snprintf(buf, 8, "%4zu%c", size, *suffixes) : snprintf(buf, 8, "%5zu", size); return ret >= 0 ? buf : "(error)"; } #define PRINT_SIZE(x) (print_size((char[8]){0}, (size_t) (x))) void vk_malloc_print_stats(struct vk_malloc *ma, enum pl_log_level lev) { struct vk_ctx *vk = ma->vk; size_t total_size = 0; size_t total_used = 0; size_t total_res = 0; PL_MSG(vk, lev, "Memory heaps supported by device:"); for (int i = 0; i < ma->props.memoryHeapCount; i++) { VkMemoryHeap heap = ma->props.memoryHeaps[i]; PL_MSG(vk, lev, " %d: flags 0x%x size %s", i, (unsigned) heap.flags, PRINT_SIZE(heap.size)); } PL_DEBUG(vk, "Memory types supported by device:"); for (int i = 0; i < ma->props.memoryTypeCount; i++) { VkMemoryType type = ma->props.memoryTypes[i]; PL_DEBUG(vk, " %d: flags 0x%x heap %d", i, (unsigned) type.propertyFlags, (int) type.heapIndex); } pl_mutex_lock(&ma->lock); for (int i = 0; i < ma->pools.num; i++) { struct vk_pool *pool = &ma->pools.elem[i]; const struct vk_malloc_params *par = &pool->params; PL_MSG(vk, lev, "Memory pool %d:", i); PL_MSG(vk, lev, " Compatible types: 0x%"PRIx32, par->reqs.memoryTypeBits); if (par->required) PL_MSG(vk, lev, " Required flags: 0x%"PRIx32, par->required); if (par->optimal) PL_MSG(vk, lev, " Optimal flags: 0x%"PRIx32, par->optimal); if (par->buf_usage) PL_MSG(vk, lev, " Buffer flags: 0x%"PRIx32, par->buf_usage); if (par->export_handle) PL_MSG(vk, lev, " Export handle: 0x%x", par->export_handle); size_t pool_size = 0; size_t pool_used = 0; size_t pool_res = 0; for (int j = 0; j < pool->slabs.num; j++) { struct vk_slab *slab = pool->slabs.elem[j]; pl_mutex_lock(&slab->lock); size_t avail = __builtin_popcountll(slab->spacemap) * slab->pagesize; size_t slab_res = slab->size - avail; PL_MSG(vk, lev, " Slab %2d: %8"PRIx64" x %s: " "%s used %s res %s alloc from heap %d, efficiency %.2f%%", j, slab->spacemap, PRINT_SIZE(slab->pagesize), PRINT_SIZE(slab->used), PRINT_SIZE(slab_res), PRINT_SIZE(slab->size), (int) slab->mtype.heapIndex, efficiency(slab->used, slab_res)); pool_size += slab->size; pool_used += slab->used; pool_res += slab_res; pl_mutex_unlock(&slab->lock); } PL_MSG(vk, lev, " Pool summary: %s used %s res %s alloc, " "efficiency %.2f%%, utilization %.2f%%", PRINT_SIZE(pool_used), PRINT_SIZE(pool_res), PRINT_SIZE(pool_size), efficiency(pool_used, pool_res), efficiency(pool_res, pool_size)); total_size += pool_size; total_used += pool_used; total_res += pool_res; } pl_mutex_unlock(&ma->lock); PL_MSG(vk, lev, "Memory summary: %s used %s res %s alloc, " "efficiency %.2f%%, utilization %.2f%%", PRINT_SIZE(total_used), PRINT_SIZE(total_res), PRINT_SIZE(total_size), efficiency(total_used, total_res), efficiency(total_res, total_size)); } static void slab_free(struct vk_ctx *vk, struct vk_slab *slab) { if (!slab) return; #ifndef NDEBUG if (!slab->dedicated && slab->used > 0) { fprintf(stderr, "!!! libplacebo: leaked %zu bytes of vulkan memory\n" "!!! slab total size: %zu bytes, heap: %d, flags: 0x%"PRIX64"\n", slab->used, (size_t) slab->size, (int) slab->mtype.heapIndex, (uint64_t) slab->mtype.propertyFlags); } #endif if (slab->imported) { switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: PL_TRACE(vk, "Unimporting slab of size %s from fd: %d", PRINT_SIZE(slab->size), slab->handle.fd); break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: #ifdef PL_HAVE_WIN32 PL_TRACE(vk, "Unimporting slab of size %s from handle: %p", PRINT_SIZE(slab->size), (void *) slab->handle.handle); #endif break; case PL_HANDLE_HOST_PTR: PL_TRACE(vk, "Unimporting slab of size %s from ptr: %p", PRINT_SIZE(slab->size), (void *) slab->handle.ptr); break; } } else { switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: #ifdef PL_HAVE_UNIX if (slab->handle.fd > -1) close(slab->handle.fd); #endif break; case PL_HANDLE_WIN32: #ifdef PL_HAVE_WIN32 if (slab->handle.handle != NULL) CloseHandle(slab->handle.handle); #endif break; case PL_HANDLE_WIN32_KMT: // PL_HANDLE_WIN32_KMT is just an identifier. It doesn't get closed. break; case PL_HANDLE_HOST_PTR: // Implicitly unmapped break; } PL_DEBUG(vk, "Freeing slab of size %s", PRINT_SIZE(slab->size)); } vk->DestroyBuffer(vk->dev, slab->buffer, PL_VK_ALLOC); // also implicitly unmaps the memory if needed vk->FreeMemory(vk->dev, slab->mem, PL_VK_ALLOC); pl_mutex_destroy(&slab->lock); pl_free(slab); } // type_mask: optional // thread-safety: safe static bool find_best_memtype(const struct vk_malloc *ma, uint32_t type_mask, const struct vk_malloc_params *params, uint32_t *out_index) { struct vk_ctx *vk = ma->vk; int best = -1; // The vulkan spec requires memory types to be sorted in the "optimal" // order, so the first matching type we find will be the best/fastest one. // That being said, we still want to prioritize memory types that have // better optional flags. type_mask &= params->reqs.memoryTypeBits; for (int i = 0; i < ma->props.memoryTypeCount; i++) { const VkMemoryType *mtype = &ma->props.memoryTypes[i]; // The memory type flags must include our properties if ((mtype->propertyFlags & params->required) != params->required) continue; // The memory heap must be large enough for the allocation VkDeviceSize heapSize = ma->props.memoryHeaps[mtype->heapIndex].size; if (params->reqs.size > heapSize) continue; // The memory type must be supported by the type mask (bitfield) if (!(type_mask & (1LU << i))) continue; // Calculate the score as the number of optimal property flags matched int score = __builtin_popcountl(mtype->propertyFlags & params->optimal); if (score > best) { *out_index = i; best = score; } } if (best < 0) { PL_ERR(vk, "Found no memory type matching property flags 0x%x and type " "bits 0x%x!", (unsigned) params->required, (unsigned) type_mask); return false; } return true; } static bool buf_external_check(struct vk_ctx *vk, VkBufferUsageFlags usage, enum pl_handle_type handle_type, bool import) { if (!handle_type) return true; VkPhysicalDeviceExternalBufferInfo info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO_KHR, .usage = usage, .handleType = vk_mem_handle_type(handle_type), }; VkExternalBufferProperties props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES_KHR, }; pl_assert(info.handleType); vk->GetPhysicalDeviceExternalBufferProperties(vk->physd, &info, &props); return vk_external_mem_check(vk, &props.externalMemoryProperties, handle_type, import); } // thread-safety: safe static struct vk_slab *slab_alloc(struct vk_malloc *ma, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = pl_alloc_ptr(NULL, slab); *slab = (struct vk_slab) { .age = ma->age, .size = params->reqs.size, .handle_type = params->export_handle, }; pl_mutex_init(&slab->lock); switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: slab->handle.fd = -1; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: slab->handle.handle = NULL; break; case PL_HANDLE_HOST_PTR: slab->handle.ptr = NULL; break; } VkExportMemoryAllocateInfoKHR ext_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, .handleTypes = vk_mem_handle_type(slab->handle_type), }; uint32_t type_mask = UINT32_MAX; if (params->buf_usage) { // Queue family sharing modes don't matter for buffers, so we just // set them as concurrent and stop worrying about it. uint32_t qfs[3] = {0}; for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkExternalMemoryBufferCreateInfoKHR ext_buf_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, .handleTypes = ext_info.handleTypes, }; VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = slab->handle_type ? &ext_buf_info : NULL, .size = slab->size, .usage = params->buf_usage, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; if (!buf_external_check(vk, binfo.usage, slab->handle_type, false)) { PL_ERR(vk, "Failed allocating shared memory buffer: possibly " "the handle type is unsupported?"); goto error; } VK(vk->CreateBuffer(vk->dev, &binfo, PL_VK_ALLOC, &slab->buffer)); PL_VK_NAME(BUFFER, slab->buffer, "slab"); VkMemoryRequirements reqs = {0}; vk->GetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs); slab->size = reqs.size; // this can be larger than `slab->size` type_mask = reqs.memoryTypeBits; // Note: we can ignore `reqs.align` because we always bind the buffer // memory to offset 0 } VkMemoryAllocateInfo minfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = slab->size, }; if (params->export_handle) vk_link_struct(&minfo, &ext_info); VkMemoryDedicatedAllocateInfoKHR dinfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, .image = params->ded_image, }; if (params->ded_image) vk_link_struct(&minfo, &dinfo); if (!find_best_memtype(ma, type_mask, params, &minfo.memoryTypeIndex)) goto error; const VkMemoryType *mtype = &ma->props.memoryTypes[minfo.memoryTypeIndex]; PL_DEBUG(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d", (size_t) slab->size, (unsigned) mtype->propertyFlags, (int) minfo.memoryTypeIndex, (int) mtype->heapIndex); VkResult res = vk->AllocateMemory(vk->dev, &minfo, PL_VK_ALLOC, &slab->mem); switch (res) { case VK_ERROR_OUT_OF_DEVICE_MEMORY: case VK_ERROR_OUT_OF_HOST_MEMORY: PL_ERR(vk, "Allocation of size %s failed: %s!", PRINT_SIZE(slab->size), vk_res_str(res)); vk_malloc_print_stats(ma, PL_LOG_ERR); pl_log_stack_trace(vk->log, PL_LOG_ERR); goto error; default: PL_VK_ASSERT(res, "vkAllocateMemory"); } slab->mtype = *mtype; if (mtype->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VK(vk->MapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); slab->coherent = mtype->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } if (slab->buffer) VK(vk->BindBufferMemory(vk->dev, slab->buffer, slab->mem, 0)); #ifdef PL_HAVE_UNIX if (slab->handle_type == PL_HANDLE_FD || slab->handle_type == PL_HANDLE_DMA_BUF) { VkMemoryGetFdInfoKHR fd_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, .memory = slab->mem, .handleType = ext_info.handleTypes, }; VK(vk->GetMemoryFdKHR(vk->dev, &fd_info, &slab->handle.fd)); } #endif #ifdef PL_HAVE_WIN32 if (slab->handle_type == PL_HANDLE_WIN32 || slab->handle_type == PL_HANDLE_WIN32_KMT) { VkMemoryGetWin32HandleInfoKHR handle_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, .memory = slab->mem, .handleType = ext_info.handleTypes, }; VK(vk->GetMemoryWin32HandleKHR(vk->dev, &handle_info, &slab->handle.handle)); } #endif // free space accounting is done by the caller return slab; error: slab_free(vk, slab); return NULL; } static void pool_uninit(struct vk_ctx *vk, struct vk_pool *pool) { for (int i = 0; i < pool->slabs.num; i++) slab_free(vk, pool->slabs.elem[i]); pl_free(pool->slabs.elem); *pool = (struct vk_pool) {0}; } struct vk_malloc *vk_malloc_create(struct vk_ctx *vk) { struct vk_malloc *ma = pl_zalloc_ptr(NULL, ma); pl_mutex_init(&ma->lock); vk->GetPhysicalDeviceMemoryProperties(vk->physd, &ma->props); ma->vk = vk; vk_malloc_print_stats(ma, PL_LOG_INFO); return ma; } void vk_malloc_destroy(struct vk_malloc **ma_ptr) { struct vk_malloc *ma = *ma_ptr; if (!ma) return; for (int i = 0; i < ma->pools.num; i++) pool_uninit(ma->vk, &ma->pools.elem[i]); pl_mutex_destroy(&ma->lock); pl_free_ptr(ma_ptr); } void vk_malloc_garbage_collect(struct vk_malloc *ma) { struct vk_ctx *vk = ma->vk; pl_mutex_lock(&ma->lock); ma->age++; for (int i = 0; i < ma->pools.num; i++) { struct vk_pool *pool = &ma->pools.elem[i]; for (int n = 0; n < pool->slabs.num; n++) { struct vk_slab *slab = pool->slabs.elem[n]; pl_mutex_lock(&slab->lock); if (slab->used || (ma->age - slab->age) <= MAXIMUM_SLAB_AGE) { pl_mutex_unlock(&slab->lock); continue; } PL_DEBUG(vk, "Garbage collected slab of size %s from pool %d", PRINT_SIZE(slab->size), pool->index); pl_mutex_unlock(&slab->lock); slab_free(ma->vk, slab); PL_ARRAY_REMOVE_AT(pool->slabs, n--); } } pl_mutex_unlock(&ma->lock); } pl_handle_caps vk_malloc_handle_caps(const struct vk_malloc *ma, bool import) { struct vk_ctx *vk = ma->vk; pl_handle_caps caps = 0; for (int i = 0; vk_mem_handle_list[i]; i++) { // Try seeing if we could allocate a "basic" buffer using these // capabilities, with no fancy buffer usage. More specific checks will // happen down the line at VkBuffer creation time, but this should give // us a rough idea of what the driver supports. enum pl_handle_type type = vk_mem_handle_list[i]; if (buf_external_check(vk, VK_BUFFER_USAGE_TRANSFER_DST_BIT, type, import)) caps |= type; } return caps; } void vk_malloc_free(struct vk_malloc *ma, struct vk_memslice *slice) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = slice->priv; if (!slab || slab->dedicated) { slab_free(vk, slab); goto done; } pl_mutex_lock(&slab->lock); int page_idx = slice->offset / slab->pagesize; slab->spacemap |= 0x1LLU << page_idx; slab->used -= slice->size; slab->age = ma->age; pl_assert(slab->used >= 0); pl_mutex_unlock(&slab->lock); done: *slice = (struct vk_memslice) {0}; } static inline bool pool_params_eq(const struct vk_malloc_params *a, const struct vk_malloc_params *b) { return a->reqs.size == b->reqs.size && a->reqs.alignment == b->reqs.alignment && a->reqs.memoryTypeBits == b->reqs.memoryTypeBits && a->required == b->required && a->optimal == b->optimal && a->buf_usage == b->buf_usage && a->export_handle == b->export_handle; } static struct vk_pool *find_pool(struct vk_malloc *ma, const struct vk_malloc_params *params) { pl_assert(!params->import_handle); pl_assert(!params->ded_image); struct vk_malloc_params fixed = *params; fixed.reqs.alignment = 0; fixed.reqs.size = 0; fixed.shared_mem = (struct pl_shared_mem) {0}; for (int i = 0; i < ma->pools.num; i++) { if (pool_params_eq(&ma->pools.elem[i].params, &fixed)) return &ma->pools.elem[i]; } // Not found => add it PL_ARRAY_GROW(ma, ma->pools); size_t idx = ma->pools.num++; ma->pools.elem[idx] = (struct vk_pool) { .params = fixed, .index = idx, }; return &ma->pools.elem[idx]; } // Returns a suitable memory page from the pool. A new slab will be allocated // under the hood, if necessary. // // Note: This locks the slab it returns static struct vk_slab *pool_get_page(struct vk_malloc *ma, struct vk_pool *pool, size_t size, size_t align, VkDeviceSize *offset) { struct vk_slab *slab = NULL; int slab_pages = MINIMUM_PAGE_COUNT; size = PL_ALIGN2(size, PAGE_SIZE_ALIGN); const size_t pagesize = PL_ALIGN(size, align); for (int i = 0; i < pool->slabs.num; i++) { slab = pool->slabs.elem[i]; if (slab->pagesize < size) continue; if (slab->pagesize > pagesize * MINIMUM_PAGE_COUNT) // rough heuristic continue; if (slab->pagesize % align) continue; pl_mutex_lock(&slab->lock); int page_idx = __builtin_ffsll(slab->spacemap); if (!page_idx--) { pl_mutex_unlock(&slab->lock); // Increase the number of slabs to allocate for new slabs the // more existing full slabs exist for this size range slab_pages = PL_MIN(slab_pages << 1, MAXIMUM_PAGE_COUNT); continue; } slab->spacemap ^= 0x1LLU << page_idx; pl_mutex_unlock(&slab->lock); *offset = page_idx * slab->pagesize; return slab; } // Otherwise, allocate a new vk_slab and append it to the list. VkDeviceSize slab_size = slab_pages * pagesize; pl_static_assert(MINIMUM_SLAB_SIZE <= PAGE_SIZE_ALIGN * MAXIMUM_PAGE_COUNT); slab_size = PL_CLAMP(slab_size, MINIMUM_SLAB_SIZE, MAXIMUM_SLAB_SIZE); slab_pages = slab_size / pagesize; struct vk_malloc_params params = pool->params; params.reqs.size = slab_size; // Don't hold the lock while allocating the slab, because it can be a // potentially very costly operation. pl_mutex_unlock(&ma->lock); slab = slab_alloc(ma, ¶ms); pl_mutex_lock(&ma->lock); if (!slab) return NULL; pl_mutex_lock(&slab->lock); slab->spacemap = (slab_pages == sizeof(uint64_t) * 8) ? ~0LLU : ~(~0LLU << slab_pages); slab->pagesize = pagesize; PL_ARRAY_APPEND(NULL, pool->slabs, slab); // Return the first page in this newly allocated slab slab->spacemap ^= 0x1; *offset = 0; return slab; } static bool vk_malloc_import(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; VkExternalMemoryHandleTypeFlagBitsKHR vk_handle_type; vk_handle_type = vk_mem_handle_type(params->import_handle); struct vk_slab *slab = NULL; const struct pl_shared_mem *shmem = ¶ms->shared_mem; VkMemoryDedicatedAllocateInfoKHR dinfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, .image = params->ded_image, }; VkImportMemoryFdInfoKHR fdinfo = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, .handleType = vk_handle_type, .fd = -1, }; VkImportMemoryHostPointerInfoEXT ptrinfo = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, .handleType = vk_handle_type, }; VkMemoryAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = shmem->size, }; if (params->ded_image) vk_link_struct(&ainfo, &dinfo); VkBuffer buffer = VK_NULL_HANDLE; VkMemoryRequirements reqs = params->reqs; if (params->buf_usage) { uint32_t qfs[3] = {0}; for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkExternalMemoryBufferCreateInfoKHR ext_buf_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, .handleTypes = vk_handle_type, }; VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = &ext_buf_info, .size = shmem->size, .usage = params->buf_usage, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; VK(vk->CreateBuffer(vk->dev, &binfo, PL_VK_ALLOC, &buffer)); PL_VK_NAME(BUFFER, buffer, "imported"); vk->GetBufferMemoryRequirements(vk->dev, buffer, &reqs); } if (reqs.size > shmem->size) { PL_ERR(vk, "Imported object requires %zu bytes, larger than the " "provided size %zu!", (size_t) reqs.size, shmem->size); goto error; } if (shmem->offset % reqs.alignment || shmem->offset % params->reqs.alignment) { PL_ERR(vk, "Imported object offset %zu conflicts with alignment %zu!", shmem->offset, pl_lcm(reqs.alignment, params->reqs.alignment)); goto error; } switch (params->import_handle) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: { if (!vk->GetMemoryFdPropertiesKHR) { PL_ERR(vk, "Importing PL_HANDLE_DMA_BUF requires %s.", VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME); goto error; } VkMemoryFdPropertiesKHR fdprops = { .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, }; VK(vk->GetMemoryFdPropertiesKHR(vk->dev, vk_handle_type, shmem->handle.fd, &fdprops)); // We dup() the fd to make it safe to import the same original fd // multiple times. fdinfo.fd = dup(shmem->handle.fd); if (fdinfo.fd == -1) { PL_ERR(vk, "Failed to dup() fd (%d) when importing memory: %s", fdinfo.fd, strerror(errno)); goto error; } reqs.memoryTypeBits &= fdprops.memoryTypeBits; vk_link_struct(&ainfo, &fdinfo); break; } #else // !PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: PL_ERR(vk, "PL_HANDLE_DMA_BUF requires building with UNIX support!"); goto error; #endif case PL_HANDLE_HOST_PTR: { VkMemoryHostPointerPropertiesEXT ptrprops = { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; VK(vk->GetMemoryHostPointerPropertiesEXT(vk->dev, vk_handle_type, shmem->handle.ptr, &ptrprops)); ptrinfo.pHostPointer = (void *) shmem->handle.ptr; reqs.memoryTypeBits &= ptrprops.memoryTypeBits; vk_link_struct(&ainfo, &ptrinfo); break; } case PL_HANDLE_FD: case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: PL_ERR(vk, "vk_malloc_import: unsupported handle type %d", params->import_handle); goto error; } pl_assert(ainfo.pNext); if (!find_best_memtype(ma, reqs.memoryTypeBits, params, &ainfo.memoryTypeIndex)) { PL_ERR(vk, "No compatible memory types offered for imported memory!"); goto error; } VkDeviceMemory vkmem = NULL; VK(vk->AllocateMemory(vk->dev, &ainfo, PL_VK_ALLOC, &vkmem)); slab = pl_alloc_ptr(NULL, slab); *slab = (struct vk_slab) { .mem = vkmem, .dedicated = true, .imported = true, .buffer = buffer, .size = shmem->size, .handle_type = params->import_handle, }; pl_mutex_init(&slab->lock); *out = (struct vk_memslice) { .vkmem = vkmem, .buf = buffer, .size = shmem->size - shmem->offset, .offset = shmem->offset, .shared_mem = *shmem, .priv = slab, }; switch (params->import_handle) { case PL_HANDLE_DMA_BUF: case PL_HANDLE_FD: PL_TRACE(vk, "Imported %s bytes from fd: %d%s", PRINT_SIZE(slab->size), shmem->handle.fd, params->ded_image ? " (dedicated)" : ""); // fd ownership is transferred at this point. slab->handle.fd = fdinfo.fd; fdinfo.fd = -1; break; case PL_HANDLE_HOST_PTR: PL_TRACE(vk, "Imported %s bytes from ptr: %p%s", PRINT_SIZE(slab->size), shmem->handle.ptr, params->ded_image ? " (dedicated" : ""); slab->handle.ptr = ptrinfo.pHostPointer; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: break; } VkMemoryPropertyFlags flags = ma->props.memoryTypes[ainfo.memoryTypeIndex].propertyFlags; if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VK(vk->MapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); slab->coherent = flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; out->data = (uint8_t *) slab->data + out->offset; out->coherent = slab->coherent; if (!slab->coherent) { // Mapping does not implicitly invalidate mapped memory VK(vk->InvalidateMappedMemoryRanges(vk->dev, 1, &(VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = slab->mem, .size = VK_WHOLE_SIZE, })); } } if (buffer) VK(vk->BindBufferMemory(vk->dev, buffer, vkmem, 0)); return true; error: vk->DestroyBuffer(vk->dev, buffer, PL_VK_ALLOC); #ifdef PL_HAVE_UNIX if (fdinfo.fd > -1) close(fdinfo.fd); #endif pl_free(slab); *out = (struct vk_memslice) {0}; return false; } bool vk_malloc_slice(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; pl_assert(!params->import_handle || !params->export_handle); if (params->import_handle) return vk_malloc_import(ma, out, params); pl_assert(params->reqs.size); size_t size = params->reqs.size; size_t align = params->reqs.alignment; align = pl_lcm(align, vk->limits.bufferImageGranularity); align = pl_lcm(align, vk->limits.nonCoherentAtomSize); struct vk_slab *slab; VkDeviceSize offset; if (params->ded_image || size > MAXIMUM_PAGE_SIZE) { slab = slab_alloc(ma, params); if (!slab) return false; slab->dedicated = true; offset = 0; } else { pl_mutex_lock(&ma->lock); struct vk_pool *pool = find_pool(ma, params); slab = pool_get_page(ma, pool, size, align, &offset); pl_mutex_unlock(&ma->lock); if (!slab) { PL_ERR(ma->vk, "No slab to serve request for %s bytes (with " "alignment 0x%zx) in pool %d!", PRINT_SIZE(size), align, pool->index); return false; } // For accounting, just treat the alignment as part of the used size. // Doing it this way makes sure that the sizes reported to vk_memslice // consumers are always aligned properly. size = PL_ALIGN(size, align); slab->used += size; slab->age = ma->age; pl_mutex_unlock(&slab->lock); } pl_assert(offset % align == 0); *out = (struct vk_memslice) { .vkmem = slab->mem, .offset = offset, .size = size, .buf = slab->buffer, .data = slab->data ? (uint8_t *) slab->data + offset : 0x0, .coherent = slab->coherent, .priv = slab, .shared_mem = { .handle = slab->handle, .offset = offset, .size = slab->size, }, }; return true; } libplacebo-v4.192.1/src/vulkan/malloc.h000066400000000000000000000046731417677245700177170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // All memory allocated from a vk_malloc MUST be explicitly released by // the caller before vk_malloc_destroy is called. struct vk_malloc *vk_malloc_create(struct vk_ctx *vk); void vk_malloc_destroy(struct vk_malloc **ma); // Get the supported handle types for this malloc instance pl_handle_caps vk_malloc_handle_caps(const struct vk_malloc *ma, bool import); // Represents a single "slice" of generic (non-buffer) memory, plus some // metadata for accounting. This struct is essentially read-only. struct vk_memslice { VkDeviceMemory vkmem; VkDeviceSize offset; VkDeviceSize size; void *priv; // depending on the type/flags: struct pl_shared_mem shared_mem; VkBuffer buf; // associated buffer (when `buf_usage` is nonzero) void *data; // pointer to slice (for persistently mapped slices) bool coherent; // whether `data` is coherent }; struct vk_malloc_params { VkMemoryRequirements reqs; VkMemoryPropertyFlags required; VkMemoryPropertyFlags optimal; VkBufferUsageFlags buf_usage; VkImage ded_image; // for dedicated image allocations enum pl_handle_type export_handle; enum pl_handle_type import_handle; struct pl_shared_mem shared_mem; // for `import_handle` }; bool vk_malloc_slice(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params); void vk_malloc_free(struct vk_malloc *ma, struct vk_memslice *slice); // Clean up unused slabs. Call this roughly once per frame to reduce // memory pressure / memory leaks. void vk_malloc_garbage_collect(struct vk_malloc *ma); // For debugging purposes. Doesn't include dedicated slab allocations! void vk_malloc_print_stats(struct vk_malloc *ma, enum pl_log_level); libplacebo-v4.192.1/src/vulkan/swapchain.c000066400000000000000000000723741417677245700204230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "formats.h" #include "utils.h" #include "gpu.h" #include "swapchain.h" #include "pl_thread.h" struct sem_pair { VkSemaphore in; VkSemaphore out; }; struct priv { pl_mutex lock; struct vk_ctx *vk; VkSurfaceKHR surf; PL_ARRAY(VkSurfaceFormatKHR) formats; // current swapchain and metadata: struct pl_vulkan_swapchain_params params; VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype VkSwapchainKHR swapchain; VkSwapchainKHR old_swapchain; int cur_width, cur_height; int swapchain_depth; pl_rc_t frames_in_flight; // number of frames currently queued bool suboptimal; // true once VK_SUBOPTIMAL_KHR is returned bool needs_recreate; // swapchain needs to be recreated struct pl_color_repr color_repr; struct pl_color_space color_space; struct pl_hdr_metadata hdr_metadata; // state of the images: PL_ARRAY(pl_tex) images; // pl_tex wrappers for the VkImages PL_ARRAY(struct sem_pair) sems; // pool of semaphores used to synchronize images int idx_sems; // index of next free semaphore pair int last_imgidx; // the image index last acquired (for submit) }; static struct pl_sw_fns vulkan_swapchain; static bool map_color_space(VkColorSpaceKHR space, struct pl_color_space *out) { switch (space) { // Note: This is technically against the spec, but more often than not // it's the correct result since `SRGB_NONLINEAR` is just a catch-all // for any sort of typical SDR curve, which is better approximated by // `pl_color_space_monitor`. case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: *out = pl_color_space_monitor; return true; #ifdef VK_EXT_swapchain_colorspace case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: *out = pl_color_space_monitor; return true; case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DISPLAY_P3, .transfer = PL_COLOR_TRC_BT_1886, }; return true; case VK_COLOR_SPACE_DCI_P3_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DCI_P3, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DCI_P3, .transfer = PL_COLOR_TRC_BT_1886, }; return true; case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: // TODO return false; case VK_COLOR_SPACE_BT709_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DCI_P3, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_BT2020_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_HDR10_ST2084_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, }; return true; case VK_COLOR_SPACE_DOLBYVISION_EXT: // Unlikely to ever be implemented return false; case VK_COLOR_SPACE_HDR10_HLG_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; return true; case VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_ADOBE, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_ADOBE, .transfer = PL_COLOR_TRC_GAMMA22, }; return true; case VK_COLOR_SPACE_PASS_THROUGH_EXT: *out = pl_color_space_unknown; return true; #endif #ifdef VK_AMD_display_native_hdr case VK_COLOR_SPACE_DISPLAY_NATIVE_AMD: // TODO return false; #endif default: return false; } } static bool pick_surf_format(pl_swapchain sw, const struct pl_color_space *hint) { struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_gpu gpu = sw->gpu; int best_score = 0, best_id; bool wide_gamut = pl_color_primaries_is_wide_gamut(hint->primaries); bool prefer_hdr = pl_color_transfer_is_hdr(hint->transfer); for (int i = 0; i < p->formats.num; i++) { // Color space / format whitelist struct pl_color_space space; if (!map_color_space(p->formats.elem[i].colorSpace, &space)) continue; switch (p->formats.elem[i].format) { // Only accept floating point formats for linear curves case VK_FORMAT_R16G16B16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R32G32B32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R64G64B64_SFLOAT: case VK_FORMAT_R64G64B64A64_SFLOAT: if (space.transfer == PL_COLOR_TRC_LINEAR) break; // accept continue; // Only accept 8 bit for non-HDR curves case VK_FORMAT_R8G8B8_UNORM: case VK_FORMAT_B8G8R8_UNORM: case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: if (!pl_color_transfer_is_hdr(space.transfer)) break; // accept continue; // Only accept 10 bit formats for non-linear curves case VK_FORMAT_A2R10G10B10_UNORM_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32: if (space.transfer != PL_COLOR_TRC_LINEAR) break; // accept continue; // Accept 16-bit formats for everything case VK_FORMAT_R16G16B16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM: break; // accept default: continue; } // Make sure we can wrap this format to a meaningful, valid pl_fmt for (int n = 0; n < gpu->num_formats; n++) { pl_fmt plfmt = gpu->formats[n]; const struct vk_format **pvkfmt = PL_PRIV(plfmt); if ((*pvkfmt)->tfmt != p->formats.elem[i].format) continue; enum pl_fmt_caps render_caps = 0; render_caps |= PL_FMT_CAP_RENDERABLE; render_caps |= PL_FMT_CAP_BLITTABLE; if ((plfmt->caps & render_caps) != render_caps) continue; // format valid, use it if it has a higher score int score = 0; for (int c = 0; c < 3; c++) score += plfmt->component_depth[c]; if (pl_color_primaries_is_wide_gamut(space.primaries) == wide_gamut) score += 1000; if (space.primaries == hint->primaries) score += 2000; if (pl_color_transfer_is_hdr(space.transfer) == prefer_hdr) score += 10000; if (space.transfer == hint->transfer) score += 20000; switch (plfmt->type) { case PL_FMT_UNKNOWN: break; case PL_FMT_UINT: break; case PL_FMT_SINT: break; case PL_FMT_UNORM: score += 500; break; case PL_FMT_SNORM: score += 400; break; case PL_FMT_FLOAT: score += 300; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); }; if (score > best_score) { best_score = score; best_id = i; break; } } } if (!best_score) { PL_ERR(vk, "Failed picking any valid, renderable surface format!"); return false; } VkSurfaceFormatKHR new_sfmt = p->formats.elem[best_id]; if (p->protoInfo.imageFormat != new_sfmt.format || p->protoInfo.imageColorSpace != new_sfmt.colorSpace) { PL_INFO(vk, "Picked surface configuration %d: %s + %s", best_id, vk_fmt_name(new_sfmt.format), vk_csp_name(new_sfmt.colorSpace)); p->protoInfo.imageFormat = new_sfmt.format; p->protoInfo.imageColorSpace = new_sfmt.colorSpace; p->needs_recreate = true; } return true; } static void set_hdr_metadata(struct priv *p, const struct pl_hdr_metadata *metadata) { struct vk_ctx *vk = p->vk; if (!vk->SetHdrMetadataEXT) return; // Ignore no-op changes if (pl_hdr_metadata_equal(metadata, &p->hdr_metadata)) return; // Remember the metadata so we can re-apply it after swapchain recreation p->hdr_metadata = *metadata; // Ignore HDR metadata requests for SDR swapchains if (!pl_color_transfer_is_hdr(p->color_space.transfer)) return; if (!p->swapchain) return; vk->SetHdrMetadataEXT(vk->dev, 1, &p->swapchain, &(VkHdrMetadataEXT) { .sType = VK_STRUCTURE_TYPE_HDR_METADATA_EXT, .displayPrimaryRed = { metadata->prim.red.x, metadata->prim.red.y }, .displayPrimaryGreen = { metadata->prim.green.x, metadata->prim.green.y }, .displayPrimaryBlue = { metadata->prim.blue.x, metadata->prim.blue.y }, .whitePoint = { metadata->prim.white.x, metadata->prim.white.y }, .maxLuminance = metadata->max_luma, .minLuminance = metadata->min_luma, .maxContentLightLevel = metadata->max_cll, .maxFrameAverageLightLevel = metadata->max_fall, }); } pl_swapchain pl_vulkan_create_swapchain(pl_vulkan plvk, const struct pl_vulkan_swapchain_params *params) { struct vk_ctx *vk = PL_PRIV(plvk); pl_gpu gpu = plvk->gpu; if (!vk->CreateSwapchainKHR) { PL_ERR(gpu, VK_KHR_SWAPCHAIN_EXTENSION_NAME " not enabled!"); return NULL; } struct pl_swapchain *sw = pl_zalloc_obj(NULL, sw, struct priv); sw->impl = &vulkan_swapchain; sw->log = vk->log; sw->ctx = sw->log; sw->gpu = gpu; struct priv *p = PL_PRIV(sw); pl_mutex_init(&p->lock); p->params = *params; p->vk = vk; p->surf = params->surface; p->swapchain_depth = PL_DEF(params->swapchain_depth, 3); pl_assert(p->swapchain_depth > 0); atomic_init(&p->frames_in_flight, 0); p->last_imgidx = -1; p->protoInfo = (VkSwapchainCreateInfoKHR) { .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .surface = p->surf, .imageArrayLayers = 1, // non-stereoscopic .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, .minImageCount = p->swapchain_depth + 1, // +1 for the FB .presentMode = params->present_mode, .clipped = true, }; // These fields will be updated by `vk_sw_recreate` p->color_space = pl_color_space_unknown; p->color_repr = (struct pl_color_repr) { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = PL_ALPHA_UNKNOWN, }; // Make sure the swapchain present mode is supported VkPresentModeKHR *modes = NULL; uint32_t num_modes = 0; VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, NULL)); modes = pl_calloc_ptr(NULL, num_modes, modes); VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, modes)); bool supported = false; for (int i = 0; i < num_modes; i++) supported |= (modes[i] == p->protoInfo.presentMode); pl_free_ptr(&modes); if (!supported) { PL_WARN(vk, "Requested swap mode unsupported by this device, falling " "back to VK_PRESENT_MODE_FIFO_KHR"); p->protoInfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; } // Enumerate the supported surface color spaces uint32_t num_formats = 0; VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, NULL)); PL_ARRAY_RESIZE(sw, p->formats, num_formats); VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, p->formats.elem)); p->formats.num = num_formats; PL_INFO(gpu, "Available surface configurations:"); for (int i = 0; i < p->formats.num; i++) { PL_INFO(gpu, " %d: %-40s %s", i, vk_fmt_name(p->formats.elem[i].format), vk_csp_name(p->formats.elem[i].colorSpace)); } struct pl_swapchain_colors hint = {0}; if (params->prefer_hdr) { hint.primaries = PL_COLOR_PRIM_BT_2020; hint.transfer = PL_COLOR_TRC_PQ; hint.hdr = pl_hdr_metadata_hdr10; } // Ensure there exists at least some valid renderable surface format if (!pick_surf_format(sw, &hint)) goto error; return sw; error: pl_free(modes); pl_free(sw); return NULL; } static void vk_sw_destroy(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_gpu_flush(gpu); vk_wait_idle(vk); for (int i = 0; i < p->images.num; i++) pl_tex_destroy(gpu, &p->images.elem[i]); for (int i = 0; i < p->sems.num; i++) { vk->DestroySemaphore(vk->dev, p->sems.elem[i].in, PL_VK_ALLOC); vk->DestroySemaphore(vk->dev, p->sems.elem[i].out, PL_VK_ALLOC); } vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); pl_mutex_destroy(&p->lock); pl_free((void *) sw); } static int vk_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->swapchain_depth; } static bool update_swapchain_info(struct priv *p, VkSwapchainCreateInfoKHR *info, int w, int h) { struct vk_ctx *vk = p->vk; // Query the supported capabilities and update this struct as needed VkSurfaceCapabilitiesKHR caps = {0}; VK(vk->GetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, p->surf, &caps)); // Check for hidden/invisible window if (!caps.currentExtent.width || !caps.currentExtent.height) { PL_DEBUG(vk, "maxImageExtent reported as 0x0, hidden window? skipping"); return false; } // Sorted by preference static const struct { VkCompositeAlphaFlagsKHR vk_mode; enum pl_alpha_mode pl_mode; } alphaModes[] = { {VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, PL_ALPHA_INDEPENDENT}, {VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, PL_ALPHA_PREMULTIPLIED}, {VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, PL_ALPHA_UNKNOWN}, {VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, PL_ALPHA_UNKNOWN}, }; for (int i = 0; i < PL_ARRAY_SIZE(alphaModes); i++) { if (caps.supportedCompositeAlpha & alphaModes[i].vk_mode) { info->compositeAlpha = alphaModes[i].vk_mode; p->color_repr.alpha = alphaModes[i].pl_mode; PL_DEBUG(vk, "Requested alpha compositing mode: %s", vk_alpha_mode(info->compositeAlpha)); break; } } if (!info->compositeAlpha) { PL_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)", caps.supportedCompositeAlpha); goto error; } // Note: We could probably also allow picking a surface transform that // flips the framebuffer and set `pl_swapchain_frame.flipped`, but this // doesn't appear to be necessary for any vulkan implementations. static const VkSurfaceTransformFlagsKHR rotModes[] = { VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, }; for (int i = 0; i < PL_ARRAY_SIZE(rotModes); i++) { if (caps.supportedTransforms & rotModes[i]) { info->preTransform = rotModes[i]; PL_DEBUG(vk, "Requested surface transform: %s", vk_surface_transform(info->preTransform)); break; } } if (!info->preTransform) { PL_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)", caps.supportedTransforms); goto error; } // Image count as required PL_DEBUG(vk, "Requested image count: %d (min %d max %d)", (int) info->minImageCount, (int) caps.minImageCount, (int) caps.maxImageCount); info->minImageCount = PL_MAX(info->minImageCount, caps.minImageCount); if (caps.maxImageCount) info->minImageCount = PL_MIN(info->minImageCount, caps.maxImageCount); PL_DEBUG(vk, "Requested image size: %dx%d (min %dx%d < cur %dx%d < max %dx%d)", w, h, caps.minImageExtent.width, caps.minImageExtent.height, caps.currentExtent.width, caps.currentExtent.height, caps.maxImageExtent.width, caps.maxImageExtent.height); // Default the requested size based on the reported extent if (caps.currentExtent.width != 0xFFFFFFFF) w = PL_DEF(w, caps.currentExtent.width); if (caps.currentExtent.height != 0xFFFFFFFF) h = PL_DEF(h, caps.currentExtent.height); // Otherwise, re-use the existing size if available w = PL_DEF(w, info->imageExtent.width); h = PL_DEF(h, info->imageExtent.height); if (!w || !h) { PL_ERR(vk, "Failed resizing swapchain: unknown size?"); goto error; } // Clamp the extent based on the supported limits w = PL_CLAMP(w, caps.minImageExtent.width, caps.maxImageExtent.width); h = PL_CLAMP(h, caps.minImageExtent.height, caps.maxImageExtent.height); info->imageExtent = (VkExtent2D) { w, h }; // We just request whatever makes sense, and let the pl_vk decide what // pl_tex_params that translates to. That said, we still need to intersect // the swapchain usage flags with the format usage flags VkImageUsageFlags req_flags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; VkImageUsageFlags opt_flags = VK_IMAGE_USAGE_STORAGE_BIT; info->imageUsage = caps.supportedUsageFlags & (req_flags | opt_flags); VkFormatProperties fmtprop = {0}; vk->GetPhysicalDeviceFormatProperties(vk->physd, info->imageFormat, &fmtprop); #define CHECK(usage, feature) \ if (!((fmtprop.optimalTilingFeatures & VK_FORMAT_FEATURE_##feature##_BIT))) \ info->imageUsage &= ~VK_IMAGE_USAGE_##usage##_BIT CHECK(COLOR_ATTACHMENT, COLOR_ATTACHMENT); CHECK(TRANSFER_DST, TRANSFER_DST); CHECK(STORAGE, STORAGE_IMAGE); if ((info->imageUsage & req_flags) != req_flags) { PL_ERR(vk, "The swapchain doesn't support rendering and blitting!"); goto error; } return true; error: return false; } static void destroy_swapchain(struct vk_ctx *vk, struct priv *p) { assert(p->old_swapchain); vk->DestroySwapchainKHR(vk->dev, p->old_swapchain, PL_VK_ALLOC); p->old_swapchain = VK_NULL_HANDLE; } static bool vk_sw_recreate(pl_swapchain sw, int w, int h) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; VkImage *vkimages = NULL; uint32_t num_images = 0; // It's invalid to trigger another swapchain recreation while there's more // than one swapchain already active, so we need to flush any pending // asynchronous swapchain release operations that may be ongoing while (p->old_swapchain) vk_poll_commands(vk, UINT64_MAX); VkSwapchainCreateInfoKHR sinfo = p->protoInfo; sinfo.oldSwapchain = p->swapchain; if (!update_swapchain_info(p, &sinfo, w, h)) return false; PL_INFO(sw, "(Re)creating swapchain of size %dx%d", sinfo.imageExtent.width, sinfo.imageExtent.height); VK(vk->CreateSwapchainKHR(vk->dev, &sinfo, PL_VK_ALLOC, &p->swapchain)); p->suboptimal = false; p->needs_recreate = false; p->cur_width = sinfo.imageExtent.width; p->cur_height = sinfo.imageExtent.height; // Freeing the old swapchain while it's still in use is an error, so do it // asynchronously once the device is idle if (sinfo.oldSwapchain) { p->old_swapchain = sinfo.oldSwapchain; vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p); } // Get the new swapchain images VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, NULL)); vkimages = pl_calloc_ptr(NULL, num_images, vkimages); VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, vkimages)); for (int i = 0; i < num_images; i++) PL_VK_NAME(IMAGE, vkimages[i], "swapchain"); // If needed, allocate some more semaphores while (num_images > p->sems.num) { VkSemaphore sem_in = NULL, sem_out = NULL; static const VkSemaphoreCreateInfo seminfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_in)); VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_out)); PL_VK_NAME(SEMAPHORE, sem_in, "swapchain in"); PL_VK_NAME(SEMAPHORE, sem_out, "swapchain out"); PL_ARRAY_APPEND(sw, p->sems, (struct sem_pair) { .in = sem_in, .out = sem_out, }); } // Recreate the pl_tex wrappers for (int i = 0; i < p->images.num; i++) pl_tex_destroy(gpu, &p->images.elem[i]); p->images.num = 0; for (int i = 0; i < num_images; i++) { const VkExtent2D *ext = &sinfo.imageExtent; pl_tex tex = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = vkimages[i], .width = ext->width, .height = ext->height, .format = sinfo.imageFormat, .usage = sinfo.imageUsage, )); if (!tex) goto error; PL_ARRAY_APPEND(sw, p->images, tex); } pl_assert(num_images > 0); int bits = 0; // The channel with the most bits is probably the most authoritative about // the actual color information (consider e.g. a2bgr10). Slight downside // in that it results in rounding r/b for e.g. rgb565, but we don't pick // surfaces with fewer than 8 bits anyway, so let's not care for now. pl_fmt fmt = p->images.elem[0]->params.format; for (int i = 0; i < fmt->num_components; i++) bits = PL_MAX(bits, fmt->component_depth[i]); p->color_repr.bits.sample_depth = bits; p->color_repr.bits.color_depth = bits; // FIXME: infer `p->color_space.sig_peak` etc. from HDR metadata? map_color_space(sinfo.imageColorSpace, &p->color_space); // Forcibly re-apply HDR metadata, bypassing the no-op check struct pl_hdr_metadata metadata = p->hdr_metadata; p->hdr_metadata = pl_hdr_metadata_empty; set_hdr_metadata(p, &metadata); pl_free(vkimages); return true; error: PL_ERR(vk, "Failed (re)creating swapchain!"); pl_free(vkimages); if (p->swapchain != sinfo.oldSwapchain) { vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); p->swapchain = VK_NULL_HANDLE; p->cur_width = p->cur_height = 0; p->suboptimal = false; p->needs_recreate = false; } return false; } static bool vk_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->lock); bool recreate = !p->swapchain || p->needs_recreate; if (p->suboptimal && !p->params.allow_suboptimal) recreate = true; if (recreate && !vk_sw_recreate(sw, 0, 0)) { pl_mutex_unlock(&p->lock); return false; } VkSemaphore sem_in = p->sems.elem[p->idx_sems].in; PL_TRACE(vk, "vkAcquireNextImageKHR signals %p", (void *) sem_in); for (int attempts = 0; attempts < 2; attempts++) { uint32_t imgidx = 0; VkResult res = vk->AcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, sem_in, VK_NULL_HANDLE, &imgidx); switch (res) { case VK_SUBOPTIMAL_KHR: p->suboptimal = true; // fall through case VK_SUCCESS: p->last_imgidx = imgidx; pl_vulkan_release(sw->gpu, p->images.elem[imgidx], VK_IMAGE_LAYOUT_UNDEFINED, (pl_vulkan_sem){ sem_in }); *out_frame = (struct pl_swapchain_frame) { .fbo = p->images.elem[imgidx], .flipped = false, .color_repr = p->color_repr, .color_space = p->color_space, }; // keep lock held return true; case VK_ERROR_OUT_OF_DATE_KHR: { // In these cases try recreating the swapchain if (!vk_sw_recreate(sw, 0, 0)) { pl_mutex_unlock(&p->lock); return false; } continue; } default: PL_ERR(vk, "Failed acquiring swapchain image: %s", vk_res_str(res)); pl_mutex_unlock(&p->lock); return false; } } // If we've exhausted the number of attempts to recreate the swapchain, // just give up silently and let the user retry some time later. pl_mutex_unlock(&p->lock); return false; } static void present_cb(struct priv *p, void *arg) { (void) pl_rc_deref(&p->frames_in_flight); } static bool vk_sw_submit_frame(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_assert(p->last_imgidx >= 0); pl_assert(p->swapchain); uint32_t idx = p->last_imgidx; VkSemaphore sem_out = p->sems.elem[p->idx_sems++].out; p->idx_sems %= p->sems.num; p->last_imgidx = -1; bool held = pl_vulkan_hold(gpu, p->images.elem[idx], VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, (pl_vulkan_sem){ sem_out }); if (!held) { PL_ERR(gpu, "Failed holding swapchain image for presentation"); pl_mutex_unlock(&p->lock); return false; } struct vk_cmd *cmd = pl_vk_steal_cmd(gpu); if (!cmd) { pl_mutex_unlock(&p->lock); return false; } pl_rc_ref(&p->frames_in_flight); vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); if (!vk_cmd_submit(vk, &cmd)) { pl_mutex_unlock(&p->lock); return false; } struct vk_cmdpool *pool = vk->pool_graphics; VkQueue queue = pool->queues[pool->idx_queues]; vk_rotate_queues(p->vk); vk_malloc_garbage_collect(vk->ma); VkPresentInfoKHR pinfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .waitSemaphoreCount = 1, .pWaitSemaphores = &sem_out, .swapchainCount = 1, .pSwapchains = &p->swapchain, .pImageIndices = &idx, }; PL_TRACE(vk, "vkQueuePresentKHR waits on %p", (void *) sem_out); VkResult res = vk->QueuePresentKHR(queue, &pinfo); pl_mutex_unlock(&p->lock); switch (res) { case VK_SUBOPTIMAL_KHR: p->suboptimal = true; // fall through case VK_SUCCESS: return true; case VK_ERROR_OUT_OF_DATE_KHR: // We can silently ignore this error, since the next start_frame will // recreate the swapchain automatically. return true; default: PL_ERR(vk, "Failed presenting to queue %p: %s", (void *) queue, vk_res_str(res)); return false; } } static void vk_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); while (pl_rc_count(&p->frames_in_flight) >= p->swapchain_depth) { pl_mutex_unlock(&p->lock); // don't hold mutex while blocking vk_poll_commands(p->vk, UINT64_MAX); pl_mutex_lock(&p->lock); } pl_mutex_unlock(&p->lock); } static bool vk_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); bool ok = true; pl_mutex_lock(&p->lock); bool width_changed = *width && *width != p->cur_width, height_changed = *height && *height != p->cur_height; if (p->suboptimal || p->needs_recreate || width_changed || height_changed) ok = vk_sw_recreate(sw, *width, *height); *width = p->cur_width; *height = p->cur_height; pl_mutex_unlock(&p->lock); return ok; } static void vk_sw_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); // This should never fail if the swapchain already exists bool ok = pick_surf_format(sw, csp); set_hdr_metadata(p, &csp->hdr); pl_assert(ok); pl_mutex_unlock(&p->lock); } bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->suboptimal; } static struct pl_sw_fns vulkan_swapchain = { .destroy = vk_sw_destroy, .latency = vk_sw_latency, .resize = vk_sw_resize, .colorspace_hint = vk_sw_colorspace_hint, .start_frame = vk_sw_start_frame, .submit_frame = vk_sw_submit_frame, .swap_buffers = vk_sw_swap_buffers, }; libplacebo-v4.192.1/src/vulkan/utils.c000066400000000000000000000103721417677245700175740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "utils.h" VkExternalMemoryHandleTypeFlagBitsKHR vk_mem_handle_type(enum pl_handle_type handle_type) { if (!handle_type) return 0; switch (handle_type) { case PL_HANDLE_FD: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; case PL_HANDLE_WIN32: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; case PL_HANDLE_WIN32_KMT: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR; case PL_HANDLE_DMA_BUF: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; case PL_HANDLE_HOST_PTR: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; } pl_unreachable(); } VkExternalSemaphoreHandleTypeFlagBitsKHR vk_sync_handle_type(enum pl_handle_type handle_type) { if (!handle_type) return 0; switch (handle_type) { case PL_HANDLE_FD: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; case PL_HANDLE_WIN32: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; case PL_HANDLE_WIN32_KMT: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR; case PL_HANDLE_DMA_BUF: case PL_HANDLE_HOST_PTR: return 0; } pl_unreachable(); } bool vk_external_mem_check(struct vk_ctx *vk, const VkExternalMemoryPropertiesKHR *props, enum pl_handle_type handle_type, bool import) { VkExternalMemoryFeatureFlagsKHR flags = props->externalMemoryFeatures; VkExternalMemoryHandleTypeFlagBitsKHR vk_handle = vk_mem_handle_type(handle_type); if (import) { if (!(flags & VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR)) { PL_DEBUG(vk, "Handle type %s (0x%x) is not importable", vk_handle_name(vk_handle), (unsigned int) handle_type); return false; } } else { if (!(flags & VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR)) { PL_DEBUG(vk, "Handle type %s (0x%x) is not exportable", vk_handle_name(vk_handle), (unsigned int) handle_type); return false; } } return true; } const enum pl_handle_type vk_mem_handle_list[] = { PL_HANDLE_HOST_PTR, #ifdef PL_HAVE_UNIX PL_HANDLE_FD, PL_HANDLE_DMA_BUF, #endif #ifdef PL_HAVE_WIN32 PL_HANDLE_WIN32, PL_HANDLE_WIN32_KMT, #endif 0 }; const enum pl_handle_type vk_sync_handle_list[] = { #ifdef PL_HAVE_UNIX PL_HANDLE_FD, #endif #ifdef PL_HAVE_WIN32 PL_HANDLE_WIN32, PL_HANDLE_WIN32_KMT, #endif 0 }; const void *vk_find_struct(const void *chain, VkStructureType stype) { const VkBaseInStructure *in = chain; while (in) { if (in->sType == stype) return in; in = in->pNext; } return NULL; } void vk_link_struct(void *chain, void *in) { if (!in) return; VkBaseOutStructure *out = chain; while (out->pNext) out = out->pNext; out->pNext = in; } void *vk_struct_memdup(void *alloc, const void *pin) { if (!pin) return NULL; const VkBaseInStructure *in = pin; size_t size = vk_struct_size(in->sType); if (!size) return NULL; VkBaseOutStructure *out = pl_memdup(alloc, in, size); out->pNext = NULL; return out; } void *vk_chain_memdup(void *alloc, const void *pin) { const VkBaseInStructure *in = pin; VkBaseOutStructure *out = vk_struct_memdup(alloc, in); if (!out) return NULL; out->pNext = vk_chain_memdup(alloc, in->pNext); return out; } libplacebo-v4.192.1/src/vulkan/utils.h000066400000000000000000000101461417677245700176000ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Return a human-readable name for various vulkan enums const char *vk_res_str(VkResult res); const char *vk_fmt_name(VkFormat fmt); const char *vk_csp_name(VkColorSpaceKHR csp); const char *vk_handle_name(VkExternalMemoryHandleTypeFlagBitsKHR handle); const char *vk_obj_type(VkObjectType obj); const char *vk_alpha_mode(VkCompositeAlphaFlagsKHR alpha); const char *vk_surface_transform(VkSurfaceTransformFlagsKHR transform); // Return the size of an arbitrary vulkan struct. Returns 0 for unknown structs size_t vk_struct_size(VkStructureType stype); // Enum translation boilerplate VkExternalMemoryHandleTypeFlagBitsKHR vk_mem_handle_type(enum pl_handle_type); VkExternalSemaphoreHandleTypeFlagBitsKHR vk_sync_handle_type(enum pl_handle_type); // Bitmask of all access flags that imply a read/write operation, respectively extern const VkAccessFlags vk_access_read; extern const VkAccessFlags vk_access_write; // Check for compatibility of a VkExternalMemoryProperties bool vk_external_mem_check(struct vk_ctx *vk, const VkExternalMemoryPropertiesKHR *props, enum pl_handle_type handle_type, bool check_import); // Static lists of external handle types we should try probing for extern const enum pl_handle_type vk_mem_handle_list[]; extern const enum pl_handle_type vk_sync_handle_list[]; // Find a structure in a pNext chain, or NULL const void *vk_find_struct(const void *chain, VkStructureType stype); // Link a structure into a pNext chain void vk_link_struct(void *chain, void *in); // Make a copy of a structure, not including the pNext chain void *vk_struct_memdup(void *alloc, const void *in); // Make a deep copy of an entire pNext chain void *vk_chain_memdup(void *alloc, const void *in); // Convenience macros to simplify a lot of common boilerplate #define PL_VK_ASSERT(res, str) \ do { \ if (res != VK_SUCCESS) { \ PL_ERR(vk, str ": %s (%s:%d)", \ vk_res_str(res), __FILE__, __LINE__); \ goto error; \ } \ } while (0) #define VK(cmd) \ do { \ PL_TRACE(vk, #cmd); \ VkResult _res = (cmd); \ PL_VK_ASSERT(_res, #cmd); \ } while (0) #define PL_VK_NAME(type, obj, name) \ do { \ if (vk->SetDebugUtilsObjectNameEXT) { \ vk->SetDebugUtilsObjectNameEXT(vk->dev, &(VkDebugUtilsObjectNameInfoEXT) { \ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, \ .objectType = VK_OBJECT_TYPE_##type, \ .objectHandle = (uint64_t) (obj), \ .pObjectName = (name), \ }); \ } \ } while (0) libplacebo-v4.192.1/src/vulkan/utils_gen.py000066400000000000000000000144761417677245700206440ustar00rootroot00000000000000#!/usr/bin/env python3 # # This file is part of libplacebo. # # libplacebo is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # libplacebo is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with libplacebo. If not, see . import os.path import sys import xml.etree.ElementTree as ET try: from mako.template import Template except ModuleNotFoundError: print('Module \'mako\' not found, please install \'python3-mako\' or ' 'an equivalent package on your system!', file=sys.stderr) sys.exit(1) TEMPLATE = Template(""" #define VK_ENABLE_BETA_EXTENSIONS #include "vulkan/utils.h" const char *vk_res_str(VkResult res) { switch (res) { %for res in vkresults: case ${res}: return "${res}"; %endfor default: return "unknown error"; } } const char *vk_fmt_name(VkFormat fmt) { switch (fmt) { %for fmt in vkformats: case ${fmt}: return "${fmt}"; %endfor default: return "unknown format"; } } const char *vk_csp_name(VkColorSpaceKHR csp) { switch (csp) { %for csp in vkspaces: case ${csp}: return "${csp}"; %endfor default: return "unknown color space"; } } const char *vk_handle_name(VkExternalMemoryHandleTypeFlagBitsKHR handle) { switch (handle) { %for handle in vkhandles: case ${handle}: return "${handle}"; %endfor default: return "unknown handle type"; } } const char *vk_alpha_mode(VkCompositeAlphaFlagsKHR alpha) { switch (alpha) { %for mode in vkalphas: case ${mode}: return "${mode}"; %endfor default: return "unknown alpha mode"; } } const char *vk_surface_transform(VkSurfaceTransformFlagsKHR tf) { switch (tf) { %for tf in vktransforms: case ${tf}: return "${tf}"; %endfor default: return "unknown surface transform"; } } const char *vk_obj_type(VkObjectType obj) { switch (obj) { %for obj in vkobjects: case ${obj.enum}: return "${obj.name}"; %endfor default: return "unknown object"; } } size_t vk_struct_size(VkStructureType stype) { switch (stype) { %for struct in vkstructs: case ${struct.stype}: return sizeof(${struct.name}); %endfor default: return 0; } } const VkAccessFlags vk_access_read = ${hex(vkaccess.read)}LLU; const VkAccessFlags vk_access_write = ${hex(vkaccess.write)}LLU; """) class Obj(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) def findall_enum(registry, name): for e in registry.iterfind('enums[@name="{0}"]/enum'.format(name)): if not 'alias' in e.attrib: yield e for e in registry.iterfind('.//enum[@extends="{0}"]'.format(name)): # ext 289 is a non-existing extension that defines some names for # proprietary downstream consumers, causes problems unless excluded if e.attrib.get('extnumber', '0') == '289': continue # some other extensions contain reserved identifiers that generally # translate to compile failures if 'RESERVED' in e.attrib['name']: continue if not 'alias' in e.attrib: yield e def get_vkenum(registry, enum): for e in findall_enum(registry, enum): yield e.attrib['name'] def get_vkobjects(registry): for t in registry.iterfind('types/type[@category="handle"]'): if 'objtypeenum' in t.attrib: yield Obj(enum = t.attrib['objtypeenum'], name = t.find('name').text) def get_vkstructs(registry): for e in registry.iterfind('types/type[@category="struct"]'): # Strings for platform-specific crap we want to blacklist as they will # most likely cause build failures blacklist_strs = [ 'ANDROID', 'Surface', 'Win32', 'D3D12', 'GGP', 'FUCHSIA', ] if any([ str in e.attrib['name'] for str in blacklist_strs ]): continue stype = None for m in e.iterfind('member'): if m.find('name').text == 'sType': stype = m break if stype and 'values' in stype.attrib: yield Obj(stype = stype.attrib['values'], name = e.attrib['name']) def get_vkaccess(registry): access = Obj(read = 0, write = 0) for e in findall_enum(registry, 'VkAccessFlagBits'): if '_READ_' in e.attrib['name']: access.read |= 1 << int(e.attrib['bitpos']) if '_WRITE_' in e.attrib['name']: access.write |= 1 << int(e.attrib['bitpos']) return access def find_registry_xml(): registry_paths = [ '%VULKAN_SDK%/share/vulkan/registry/vk.xml', '$VULKAN_SDK/share/vulkan/registry/vk.xml', '$MINGW_PREFIX/share/vulkan/registry/vk.xml', '/usr/share/vulkan/registry/vk.xml', ] for p in registry_paths: path = os.path.expandvars(p) if os.path.isfile(path): print('Found vk.xml: {0}'.format(path)) return path print('Could not find the vulkan registry (vk.xml), please specify its ' 'location manually using the -Dvulkan-registry=/path/to/vk.xml ' 'option!', file=sys.stderr) sys.exit(1) if __name__ == '__main__': assert len(sys.argv) == 3 xmlfile = sys.argv[1] outfile = sys.argv[2] if not xmlfile or xmlfile == '': xmlfile = find_registry_xml() registry = ET.parse(xmlfile) with open(outfile, 'w') as f: f.write(TEMPLATE.render( vkresults = get_vkenum(registry, 'VkResult'), vkformats = get_vkenum(registry, 'VkFormat'), vkspaces = get_vkenum(registry, 'VkColorSpaceKHR'), vkhandles = get_vkenum(registry, 'VkExternalMemoryHandleTypeFlagBits'), vkalphas = get_vkenum(registry, 'VkCompositeAlphaFlagBitsKHR'), vktransforms = get_vkenum(registry, 'VkSurfaceTransformFlagBitsKHR'), vkobjects = get_vkobjects(registry), vkstructs = get_vkstructs(registry), vkaccess = get_vkaccess(registry), ))