pax_global_header00006660000000000000000000000064132402133250014505gustar00rootroot0000000000000052 comment=6c336a1df4bdccac653eee73622f268672eb2a60 libplacebo-0.4.0/000077500000000000000000000000001324021332500136025ustar00rootroot00000000000000libplacebo-0.4.0/.gitignore000066400000000000000000000000461324021332500155720ustar00rootroot00000000000000/demos/sdl2 /build* /tags /TAGS *.exe libplacebo-0.4.0/.travis.yml000066400000000000000000000012361324021332500157150ustar00rootroot00000000000000language: c os: linux dist: trusty sudo: false compiler: - clang - gcc branches: only: - master - ci - /^v[\d.]+$/ notifications: email: false irc: channels: - "irc.freenode.org#libplacebo" on_success: change on_failure: always script: - meson build -D tests=true - cat ./build/meson-logs/meson-log.txt - ninja -v -C build - ninja -v -C build test after_failure: cat ./build/meson-logs/testlog.txt install: - export PATH="`pwd`/bin:${PATH}" - pyenv global system 3.6 - wget https://github.com/ninja-build/ninja/releases/download/v1.7.2/ninja-linux.zip && unzip -q ninja-linux.zip -d bin - pip3 install meson libplacebo-0.4.0/LICENSE000066400000000000000000000614131324021332500146140ustar00rootroot00000000000000libplacebo's code base is heavily derived from mpv, which is a fork of mplayer2, which is a fork of MPlayer. Currently, this includes files inside the following directories: bstr/* osdep/* ta/* All of the affected MPlayer/mplayer2/mpv code has been successfully relicensed to LGPLv2.1+; which means that libplacebo as a whole is licensed under the GNU Lesser General Public License version 2.1 or later. All new contributions must be published under the same license. Note to contributors: This project will remain LGPLv2.1+, so any new code that is ported from mpv (or other sources) must be LGPLv2.1+ compatible, i.e. porting GPL, LGPLv2 (without the "or later") or LGPLv3+ code is not allowed and must be rewritten instead. Appended is a complete copy of the LGPLv2.1 license text. ------------------------------------------------------------------------------- GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS libplacebo-0.4.0/README.md000066400000000000000000000244301324021332500150640ustar00rootroot00000000000000# libplacebo ![travis-ci badge](https://travis-ci.org/haasn/libplacebo.svg?branch=master) **libplacebo** is essentially the core rendering algorithms and ideas of [mpv](https://mpv.io) turned into a library. This grew out of an interest to accomplish the following goals: - Clean up mpv's internal [RA](#tier-1-rendering-abstraction) API and make it reusable for other projects. - Provide a standard library of useful GPU-accelerated image processing primitives based on GLSL, so projects like VLC or Firefox can use them without incurring a heavy dependency on `libmpv`. - Rewrite core parts of mpv's GPU-accelerated video renderer on top of redesigned abstractions. (Basically, I wanted to eliminate code smell like `shader_cache.c` and totally redesign `gpu/video.c`) **NOTE**: libplacebo is currently in an early stage. Expect the API to be unstable, and many parts to be missing. To reflect this, the API version as exported by common.h will **NOT** change except on new beta releases (v0.x). So using libplacebo directly from git master is not advised; always use one of the tagged releases if you want to play around with libplacebo in its current stage. Once the version number hits 1.X, which will mark the first stable release, the API version will be bumped for every change to the public API - even changes that happen on git master. ## Authors libplacebo's main developer is Niklas Haas ([@haasn](https://github.com/haasn)), but the project would not be possible without the development of mpv, which was done primarily by Vincent Lang ([@wm4](https://github.com/wm4)). For a full list of past contributors to mpv, see the [mpv authorship page](https://github.com/mpv-player/mpv/graphs/contributors). ### License Since the code derives from several LGPLv2.1+-licensed parts of mpv, there's little choice but to license libplacebo the same way. It's worth pointing out that, except for some minor exceptions (e.g. filters.c and colorspace.c), most of the code is either original work or can be attributed to only a small number of developers, so a relicensing to a more permissive license might be possible in principle. ## API Overview The public API of libplacebo is currently split up into the following components, the header files (and documentation) for which are available inside the [`src/include/libplacebo`](src/include/libplacebo) directory. The API is available in different "tiers", representing levels of abstraction inside libplacebo. The APIs in higher tiers depend on those in lower tiers. Which tier is used by a user depends on how much power/control they want over the actual rendering. The low-level tiers are more suitable for big projects that need strong control over the entire rendering pipeline; whereas the high-level tiers are more suitable for smaller or simpler projects that want libplacebo to take care of everything. ### Tier 0 (context, raw math primitives) - `colorspace.h`: A collection of enums and structs for describing color spaces, as well as a collection of helper functions for computing various color space transformation matrices. - `common.h`: A collection of miscellaneous utility types and macros that are shared among multiple subsystems. Usually does not need to be included directly. - `context.h`: The main entry-point into the library. Controls memory allocation, logging. and guards ABI/thread safety. - `config.h`: Macros defining information about the way libplacebo was built, including the version strings and compiled-in features/dependencies. Usually does not need to be included directly. May be useful for feature tests. - `dither.h`: Some helper functions for generating various noise and dithering matrices. Might be useful for somebody else. - `filters.h`: A collection of reusable reconstruction filter kernels, which can be used for scaling. The generated weights arrays are semi-tailored to the needs of libplacebo, but may be useful to somebody else regardless. Also contains the structs needed to define a filter kernel for the purposes of libplacebo's upscaling routines. The API functions in this tier are either used throughout the program (context, common etc.) or are low-level implementations of filter kernels, color space conversion logic etc.; which are entirely independent of GLSL and even the GPU in general. ### Tier 1 (rendering abstraction) - `gpu.h`: Exports the GPU abstraction API used by libplacebo internally. - `swapchain.h`: Exports an API for wrapping platform-specific swapchains and other display APIs. This is the API used to actually queue up rendered frames for presentation (e.g. to a window or display device). - `vulkan.h`: GPU API implementation based on Vulkan. As part of the public API, libplacebo exports a middle-level abstraction for dealing with GPU objects and state. Basically, this is the API libplacebo uses internally to wrap OpenGL, Vulkan, Direct3D etc. into a single unifying API subset that abstracts away state, messy details, synchronization etc. into a fairly high-level API suitable for libplacebo's image processing tasks. It's made public both because it constitutes part of the public API of various image processing functions, but also in the hopes that it will be useful for other developers of GPU-accelerated image processing software. ### Tier 2 (GLSL generating primitives) - `shaders.h`: The low-level interface to shader generation. This can be used to generate GLSL stubs suitable for inclusion in other programs, as part of larger shaders. For example, a program might use this interface to generate a specialized tone-mapping function for performing color space conversions, then call that from their own fragment shader code. This abstraction has an optional dependency on `gpu.h`, but can also be used independently from it. In addition to this low-level interface, there are several available shader routines which libplacebo exports: - `shaders/colorspace.h`: Shader routines for decoding and transforming colors, tone mapping, dithering, and so forth. - `shaders/sampling.h`: Shader routines for various algorithms that sample from images, such as debanding and scaling. ### Tier 3 (shader dispatch) - `dispatch.h`: A higher-level interface to the `pl_shader` system, based on `gpu.h`. This dispatch mechanism generates+executes complete GLSL shaders, subject to the constraints and limitations of the underlying GPU. This shader dispatch mechanism is designed to be combined with the shader processing routines exported by `shaders/*.h`, but takes care of the low-level translation of the resulting `pl_shader_res` objects into legal GLSL. It also takes care of resource binding, shader input placement, as well as shader caching and resource pooling; and makes sure all generated shaders have unique identifiers (so they can be freely merged together). ### Tier 4 (high level renderer) - `renderer.h`: A high-level renderer which combines the shader primitives and dispatch mechanism into a fully-fledged rendering pipeline that takes raw texture data and transforms it into the desired output image. - `utils/upload.h`: A high-level helper for uploading generic data in some user-described format to a plane texture suitable for use with `renderer.h`. These helpers essentially take care of picking/mapping a good image format supported by the GPU. (Note: Eventually, this function will also support on-CPU conversions to a different format where necessary, but for now, it will just fail) This is the "primary" interface to libplacebo, and the one most users will be interested in. It takes care of internal details such as degrading to simpler algorithms depending on the hardware's capabilities, combining the correct sequence of colorspace transformations and shader passes in order to get the best overall image quality, and so forth. **WARNING**: The `renderer.h` is a WIP component and currently doesn't support the full advertised featureset yet. If you trigger any unimplemented paths, it will abort(). Use at your own risk. ## Installing ### Gentoo An [ebuild](etc/libplacebo-9999.ebuild) is available. ### Building from source libplacebo is built using the [meson build system](http://mesonbuild.com/). You can build the project using the following steps: ```bash $ DIR=./build $ meson $DIR $ ninja -C$DIR ``` To rebuild the project on changes, re-run `ninja -Cbuild`. If you wish to install the build products to the configured prefix (typically `/usr/local/`), you can run `ninja -Cbuild install`. Note that this is normally ill-advised except for developers who know what they're doing. Regular users should rely on distro packages. ### Configuring To get a list of configuration options supported by libplacebo, after running `meson $DIR` you can run `meson configure $DIR`, e.g.: ```bash $ meson $DIR $ meson configure $DIR ``` If you want to disable a component, for example Vulkan support, you can explicitly set it to `false`, i.e.: ```bash $ meson configure $DIR -Dvulkan=false -Dshaderc=false $ ninja -C$DIR ``` ### Testing To enable building and executing the tests, you need to build with `tests` enabled, i.e.: ```bash $ meson configure $DIR -Dtests=true $ ninja -C$DIR test ``` ### Benchmarking A naive benchmark suite is provided as an extra test case, disabled by default (due to the high execution time required). To enable it, use the `bench` option: ```bash $ meson configure $DIR -Dbench=true $ meson test -C$DIR benchmark --verbose ``` ## Using Building a trivial project using libplacebo is straightforward: ```c // build with -lplacebo #include void main() { struct pl_context *ctx; ctx = pl_context_create(PL_API_VER, &(struct pl_context_params) { .log_cb = pl_log_color, .log_level = PL_LOG_INFO, }); // do something.. pl_context_destroy(&ctx); } ``` For a full documentation of the API, refer to the above [API Overview](#api-overview) as well as the [public header files](src/include/libplacebo). You can find additional examples of how to use the various components, in the [unit tests](src/tests), as well as the [demo programs](demos). I will create more and expanded tutorials/examples once libplacebo is a bit more feature-complete. ## Support If you like what I am doing with libplacebo, and would like to help see this project grow beyond its initial scope, feel free to [support me on Patreon](https://www.patreon.com/haasn). libplacebo-0.4.0/compile000077500000000000000000000000751324021332500151620ustar00rootroot00000000000000#!/bin/sh DIR=./build [ -d $DIR ] || meson $DIR ninja -C$DIR libplacebo-0.4.0/cross_mingw_i686.txt000066400000000000000000000005021324021332500174460ustar00rootroot00000000000000[binaries] c = 'i686-w64-mingw32-gcc' cpp = 'i686-w64-mingw32-g++' ar = 'i686-w64-mingw32-ar' strip = 'i686-w64-mingw32-strip' pkgconfig = 'i686-w64-mingw32-pkg-config' exe_wrapper = 'wine' # A command used to run generated executables. [host_machine] system = 'windows' cpu_family = 'x86' cpu = 'i686' endian = 'little' libplacebo-0.4.0/cross_mingw_x86_64.txt000066400000000000000000000005201324021332500177100ustar00rootroot00000000000000[binaries] c = 'x86_64-w64-mingw32-gcc' cpp = 'x86_64-w64-mingw32-g++' ar = 'x86_64-w64-mingw32-ar' strip = 'x86_64-w64-mingw32-strip' pkgconfig = 'x86_64-w64-mingw32-pkg-config' exe_wrapper = 'wine64' # A command used to run generated executables. [host_machine] system = 'windows' cpu_family = 'x86' cpu = 'x86_64' endian = 'little' libplacebo-0.4.0/demos/000077500000000000000000000000001324021332500147115ustar00rootroot00000000000000libplacebo-0.4.0/demos/lena.jpg000066400000000000000000001514261324021332500163430ustar00rootroot00000000000000JFIFddC     C   "  _ !1AQ"2aq #BR3br$%456CtScs&DUVWd7ETXe9!1AQ"aq#24B$3Rbr ?)JJRR()JJRR()JJRR(+֜G\7Ľ[,Z,~pfPR8}°pOJ?Y?tyu]YݻP]%-ϵKnK I @]JP R)@)JP R)@)JP R)@)JP R)@)JP R)@)JP R)@)JP RZuf]7qe2^@I \]{,5dH]Hjls,#~C#x!@t]gٺkbş.3dqݴoVP\;$a.##5Fj/XG}%O|0j*W*;6K-iuD`yU:{}+\NBt#)Iʰ]f.>[CI:9W<:ӧ/>e}]R:y(Wm 1_5yCIR!gk ܩYn3PXK5J[CҔ2Kn JRR()JJRR()JJRR()JJRR()JJRR()JJRR(aDQrH%,⶜q$ NНWf /RjL-µ6}2 9W5'*rp<Oo-[L{UAhVֲS, ''jRi#|x-!&JYaE%KUԁŽwCڛ|֢n'qSPA\)݇Un zJP,pwOIuyVIJ!!BU/m-5]߹=/NSeR]k)JU)~)JP R~tU??=j,һn@C%E h0T)qpv,l@XPɤ |ځ,ye<(\ez+(NP{xfjGvk(MSحۼ'M1U%]o $,g>y8 +٢^u3fE{6TEY+gAyl[c]HFGi9 lmDbn܈K@ 'uU8,TY)ui=ߡ/흲ݜnΒ]吕tSA+ddnp _4'BuURڕ8!nhƔq)>7 jӏHeJPdqEKQ%TiBRTo;ZtRV:zK#ÁN Zu;WݞJ5uFVkNv.ls-N2c{ZsUYX-\~( ݓKX< nqCqO{z4r둊Wp3RfoJRR()JJRR()JJRR()JJRR()JJRR()JPjԚ_ W {5&*`޾ TRq+iuDEX.Q8CH+ZR桒Mp?R׽軅앩a;4 s)Ԡ+J׍RmZT'.˼I|!*Z([+x_[nzT?>bR nPNB G*UvѶVtoCrK*!) U/h%E PN`똼u?lT_AJRJRR'¿H r|ף-8eVRns1>>?GOXR1X//m??5%X8il(}#`yIm%0 `}PHBBRR~Ҕ9RoZe*Z|H *3| ԗCPA ,­IO9v&j7UDy#k8ˤ.GA$ c!ꏎ3ϭ8jYњŞz/V攺Vms*;eD@ԌsI'#[5g-vk]8QޡS֗l+S[m8q) qzr}q66RV:jʎЭM֥M}F{u~?5{ȉɾ[[ND'}$R^rQs j-/w}Nl9l'бPRpArb 5n]4TXWR d(3 +#$|-w{Pve9( AsFEtT.),b֥5? TCٓ֓EÈNHiJQs۟Kk䒤ˀӘ K$)JP R)@)JP R)@)JP R)@)JP R)@)JP ݠحW9)庖ekZ HI z'zV泐㳞 HM/[D+ɥ@hmZc ͊R}1̩ UJ lB+R p<#2 j+[s崤=pK-2;Q% '$%d.i^z rlq`箂IT}{h]6{Οf٤y*E)JJRR!ԓ@| )9MňuÀ?d]e5#Eǜ<_!@\gZGed\=gqOY5)@)Jq2qiChJR& ۘ/Iy,dԟpj.nd d8I)oO%/G׼#R=%C'#ܐ'ܮ*1rl|X&P$JZ= 意U@5r)@)Jxnq ^6xJA8‬ӜoNص ڻWvJތ* l% NP^ܑy-?c~$z|ӰyA})[9'螾Vt*rSSXMe3x vqkTETPXmC@ֻKݲ%$Gҷ!֐'AGIWuvx AE%M mA+JNpG-NKޥt9uӰI^)1=T[ݒ{*zjh.h;z:3JR)JJRR()JJRR()JJRR()JJRR(pVշ]KQZ_qǰ·%8[\tIP'\|0<+u^nK!72ۼG;F3c@¼o ,)ɤi³ĐȖȬٔjlHC-\X@QG^p}P)JR~@' =^QP?iGYU#k 5z}Z,~#F3n+Y&B&C)HP>=}(~-ii Z(jxD7vZTLfDJ} y]r!t#>+#U<r--g;y×?k¾iR$LZ3?| $E#OO}\Ґ : ()V1mɓd{9T#.l~iVp 1BOy's5\N"EYKa>JW"{vm |-jrB'Q lmJfRYܵRϚy<ɪP U4KbHms(ǂGUpTJܦ0R}Q}C`n#S@\EbtgJaQAml8ۈ# JA #害jŝmjpPPL$m_$p)p -RBĻy2@@y|wT G5]!!u۬qޕP@V?m.n”K HZXWn>J۶1/+c/m)roC+$ BNs6ceGY+8JRR()^3&GD~Tۍ)_yam R$ۮKm)j8 &:?zWQ?u5S1CyKK+BO#bEۃG}m n\qB6-g+ӉHØSRbi-(-YBСЂ9? WW)5KjCyP5J>Cݥ8HBP@#s֞=ll2,zԔd Ry&C+ niD^X'JRR()JJRR()JJRR\aܗKC[; %)1*`UWcήK|V ؐ܂V9yaQf x)~LlO%K15 2ii %PVlkL6h4G}TҾ]u 4Z[mKQHI4Z$\$e.-#)~jӹ-ځ\%Kj8R0Byrj e 4JP( BaEk9qըk8TO_rRR"CQY[8@ʖ(J><Hy-}e$:"l})dAU*4'#k/aNg{:]*ǻX e,<:2|O׽(+kKhRd* R-L r2%,ϡ9'>HFTKqA)jnnSv3=xsm=Wϒ}z7bi"zh.:ʺs8Co1ſ?1%;{=UԥYjCKiZÉ)qKȥIP!@DAkOƳ9[ŭ{2tW%9̂CX mǻGi^Y5ݞ;Q4-P]VE~`0YJp <9s;O;|%%%2a%VR7)՛`;a8[+r\ o?Alꏟ>ҿoIIp׶=Q֊R0)V=atmzAj6,ukqެN5\j`8QWpU֊P2:qJmhr_,PZ3Rھk([r͊Λ$֔y!Fws./H>?Ʈ?BFiLp#[)Pܮȷ4&[=ec$HOyP2 i'h2IknQ.⒔6r3#ׂBܟy5Ρ$M&pIqWJJRR})לKMeKZ{ɫFWIIv5]r69$g} T?vS8Ž-(N1xZ3fsa'~{ 74ii %{} U$[gt$;n6RT$Oҩ=6<"&GvpʾMG€HFTs2j`&Rw LWGWM="B>X;?ODinƩJ]ՃXI5vR)@)J5,޵T!/xh#!O+q%-R6JKr SMjС .!IRB2Y-qs|{*s>6Ǧ -7)A' Cܕr hW#w7b}yݷi4(:JKn;+f֢AQ?qnta ؆a%)4ԠVm$ܲz+Zoב^\~.&d8OV欻q q!c!@xΜ9FU!(>hﱗGOqUYSZb qtˌˏbkd<馛hkl=RA` ¿SMep'SNӑ>۪浟y*mhXl`ϙ.)Vdn)37]#rc''suWQsK)V6pvy ;}&{d+#p{@nBIQ[QI (xMź2m$s'+\`FrC w)G[cDr5l#ֱ^Cyhw nb y)xwAsWUx6ʷ=)2788b֖AHRfFd!FJgzހinܦT }Vf)JJRR()Pj.԰xdvez4s)X6D4ԓ)VB7%I;iRiDZhA|ڧ4^Zgm0ͥƠ6>I)*m *R6Q9ztMf\^ӉpIt#bqd`|3B:vn5LLUn,*Do,W*Y6IB=mzJ eǵڡ426ԓ̕TJI$!cOPR=W_sNZ eIe$xNJrTTI$q*'t(EaiN2-͠oWg'>ftoVQXHRE)JJRRC}{Md2ZjZ+*T}`@yFJI _D㚓9UaF.sxFvlzW}:ڴzC'1㨬TJUNI&:?c_;b.땅f@6qߊiJIa6)Jk()JJRR()JJRR(8}8զ5e Ʀ2I'! ӌ$ҏMu$mawe- RAqǣkso=Se^B9NmWْߪ"m<6> P> xQ~˩|# GvOUoPG }=ۍީ~RahJZ@IXJ+c21R6JBIG`Q\q딫jH9U-YgȡyG;}` zB.%ۂBHmI;pz(YVv㝃cK]MV/mq^o㤯$4<}XU^}s /us5KQ4= =Fղje:e >BRakT-,!#燰4vVȹ[9H}` +%)JF}JVH)@)JP R)@)JP R)@)JPk~ͼApuا ӉAv[*KܬM@/s w9u"4+VKoӄg O4sCVD&[*S0B>JfWc^$Ա׆4rt;IQ:y5kp[B3 IP`rAm)KsB)j)#r rqi[-6NN 2|Gղ4d˨y2WMC`Y~ԜRYigBwINMaҲn7 +n9Tv#^{<"p ){gqB77OLǵDHiԬrCAY%Dծ/Wcc%%*{PNg [.YZɚ'NmµK{yIHʕi6mi)ro>6$Ub_x} =;bȠ VQ9RW-'<EJW'&bbBxd)JP RA}[tM:=;ޕ)3d$ ՞+}Vؚ)mZW*!%'bueiRHkjE+z"ֹnr_lu 4Z[mKYHI>}FTeXlw.3&7cj X BJuaT٠1y%aiZ(*Vŭ)IsEFogDe$/-dt$c wGf:ysXiD&GU"ob_mĭ05nv 3 W$d OƉi7CM[^SpW;  }5,Q4)i$,ZIRp2y|e5rKmm%l: ˦,c!QBR=M^iJR()JJRR()JJRR()JJRW^O}\ժ tyl>㴈βg)KLzcHW< }.NFiq^Y'#Rt.X5>F4P' Xϝo%z~-Ff#yF?3ʕ>NWsO(Nք'Rdp)-t!^ϥ(~Qz09qgBTZZ d~c&@<yJ(>2+€%q%2l˃ɺo1XAV/ۮVCD)R:aFzxlǵ7v[qoÎ:WWt -_CۛNݵ-++y pHAg@gK&ZBKXBCHRqnJ+P5=yy*PHg?uVh|,'ξSح[BkRyutl]>FF7x45zmP*\i97U#'ƩfaLZ}FXˠ棻fQL4myK?>1)U\I=ҖJSsx-{p±d_c p;VdJRPI$5M膄+I883^ e ހyb띨[;Ks|<1X:ת?EM 'iO/.k($ $ʾKiY{_M2;ĥ8m)k)W(PZv߸a'${2zd׹"E2ސVޒJڞdmQD@'$swIk9~t2%H2}$Ǚj}S!&@s@yOBks{pcj|ɑظeɎy|HH6ӹViI.,mmwUS/uN5MƔpJ?b9Xdja|{ {ҭ:"4H.-'q* )- Ĭ]Cy>߅VZ?/+j3$锞 Ԫ7{KUƽcgpN|7dҶ~=Hva2⣒zWxua xhGUSRYE$)JR()JJRR()JJRR()JJRRݿtK@q[[-&\q%@`x1A*zM[q2v ʊCe*##΀Gٳ]U4@P Y#,-M9gA-zgk0/%"iܻTaHSxǤwx$é9H tp6Jq HY opH>Os;NU>c57tRg})a J底c9cdrZm(BB`:*m)duDx__qGe6FVJzǕTg DeNi<Է&][sfPGthKX: 5>-DY.wC9@w%{dmIǼs4/ovRHN_yg']c8L1*m*frlOUm}6)~W#ïִK[yj;.c}[>= Gh#.Y*>\V\$Xr(ʷ9=R47J~sm+uONe`d[\ymN2 i_pV_0㘭%E%矞|j٭ ꎘmZ@*ώz]LJmbGVV2nZ‖'l|6XmtlLaJzB[cMq%<#zVhKHt4,n#L("5FƚQlG\SNR$Z.- $ ǚbu$kGRqp3uqX~MoNjِF\L%nR¶c%<cquR-qѽ9 ^5;iν5 [Q-MtD}m{}Ugg9Oqﯼ/4A*P:4L(꣭N{r=6ee}GQ*Rɬ?K)J WRVMjiNܖ0s2^R$8#k>hNʎhN`6O]㖐suѶqm,TC~ |/jSMY-ٷR,Ǖ$ T?ZR[R &Qk\łcf‹I@@O v'b\M/N$:1[K 脃!@*7ѴY/_qZ}|M6VI2xqi1tխҥ8xJpKeHPY#_2nwvHyb eڋh=9>u<}׋z1w]E$ Iv,ʒrjBzuIL%dw(uW-3P5(sF־vþ(Ͷ%V̴N> eC:eahq xQ Kr5#^-yAɶx&5Է">gU:['ȥݍUωRH)@)JP R)@)JP R)@)JP R)@)JPwg8QKHk}KvcLe!k Ja x楞wZ#Rҝ !edQh[:)R :t{d'y0㮸p$ Mk{EhxVaoUz"" 1$m{\cµߴwi|T=2hT3<')}C!M{)m g Qqjd+olmzʃq9ϖ<ɷ{VMv\ޤ ՐOvmNZ_Z%_L{"$Hm QGyբec[ڃK܌z-m}GM($~KJ65soͳVu'.%&j8MJr@'M&2d-EÎbqLJƳ9k|G/X_"Ҳb[hY?LTvT[m:X ʱ[,^߆l<\:ũ} մ-,i礸d$AQ82@[4I~WCnI> OΚv{'JIuƿ,تޥLaYӽ1Kɐt )NS~h.1%;ebğ[o* O2Iyt{t'V[-h[%@ )TAl֙K-칏%2IyT.FR;S=ˑqy*G\aI)#zg5|kAvU0JUz }y e ꤊ;=)OiQKq. N[9ZAW?hr7W6~Bi[VqEcOÕi\{9S'9ե!n(6*N㱽IcQq;Ѥ)dOBC'q87ݟbGV4 ;]$aD,lȥ'$U>UQXER*߬0c[!G;Q!m,6J$JI(j >kc-愰ç{X%O3[YQ0]/R mZriaQ[dq$PҬYֺuv+hI$ Wy2O-d(鼤MG}n~қJ5wZHO&.O¸s(҃Bj`CJӇSmZw[x,7#|믝望uZ-ςIYqUmJJ]K' ?,]]S-.)nsVSc~L۪Rhs”)@)JP R)@)JP R)@)JP Ra8҆2(kMMvꋥD=mE7,2|C?AStҮ0\od})CrRd\c=m1p`ǵۚQmQ% n=Iv7w ϫyha:W\5]|ۄ˚?%µ#$<&͵!mq(@ɉɨ[>mP|IJ 6+=Qϟξ%^A@wFT[ !>r=4]iy6(v$q\^N0msZh [RF1,U{p .vUy!Wޖڸތw`]Y=`ἶ)T?krJj 侠62I&/n zg2ap;ԙ*<4҆n!.dr֥JkˏBmm/{&]#%.\:N$#r Í$*I?ZqދS8=&scVqncA~3=ΡM)2!'j Eh.̓zXbG i֑}VU. .>>m_kf?oRb8r dROuj8ʷ,wӗ¹]$ϖOt쩗"›. mpN'ݛ N(vݔr7Z)@)JP R)@)JP R)@)JP RK}ƻ0iԡ!.q)Qi9dʷ}IvU)4jHdĔ=QW;(穪eG+x}tkMK-lid #r֮V6hOur6cJnyYWښ89s᪮-˹.倂 qAyuN:}9!Nт=`|0+ӻZs܍۱I n +E_EfJg: K$~PrR:tjɐ.j|{R bHjQSםKk'Nc}**&%NUdt]fm}M2Z5j-R| w%X [!$J>>%: q(C9ծV2|H|4TL(Ҭ ~vqTçw%=ܯF0?09OK8CR@Uf\pB YI%YQ(jEku:JvQ妟*ܤ(m9!=3[Tq-D̈y $% )g7qΫajtV287$ׅE+ ^p=-\L1dg\e)+ NU ;[I kR{K9\I6/ocmq֝i9iӰB Rm@H$Vvn%;PZ4]ClTY!L on,3{](!+*ؒ[Jmߏ'U.ѓg__Cw0и4rːV[R- ڄ)T3~ՖhwDk}͉1wIWWiL9op $Bl-=]q .YM8zGwMN?m/3¥0|; EcpcA5!X < 3ιsFuNp- ;9aDžKMbcc/?z?7KNLVy) JsQ^}ơwC>MrDSrߛ}rly%1K -P IyCΪY%[4|[*acT]yO-AԨ8^`2ÌW ~j Er-M3-r.JSeuݫ];Q԰N^zzp^DV.9,RlVYz=1Q#%:) ޵drIɪk nB&3@FYIXcx @;zjUŨo]0!HRӡ^T89A ֮vE_bvD>>Q% Ba*9Ԓe%)Kn꿴WkuvCysO*Aڔ!oVV[ChܝT-v=\M|3ԛ.H9fQ&v9sz㼕w @Y?![ER#\[NO> mgr}ըV(ɽ6bRoSl#-^>O_!YC6?o@}zxN=hyouG`)JU R)@)JP R)@)JP R)@~ 's$և\./[^]7-~/wmm3sRSxe>K;Į#O] sb ]7IVzGz_$sD*WobC a*lfv.s%n+Ñ2]\bZEEM!~:4Gv#ygjPURcnTnmPa##˯S;&%"NZ"3Oq_U !]$! c|\mmNMN);r HTEbG0GJQw!9xQM0G,PgBpzSLDqlGeE' u xf Z񛈅M i?UvT3.:M#tJN AC,.vwaZOg'늦tgٹ:۵Z[@cki$+tʊRr)qoKpoN}nh˝"BK%m69p +F$(Pw|1r:aad~2D7)@Em)R- vq/ɜϘ* #$Wbu_m-Hv;Z%)ǐO?_J^|z-Ѥ5EȍrS61=G !Dp\=]Prow1.$˛ꃔ8B0y9w2(һjo=-xzORzZڤJr)63#(s6(PqVӏ*XFY1dӲVjRy簧 .9Q>=ϩ=.b5=vrԂȩo(G \x.T+ι5"8+f(hV ut)YΕURmi9kehINniaIJRb^e%!}Y>X=ث?l5zx!b R/2ZPJdJOxچUCP` ]%$Ą|DK'hUi Q5v6`fTnn8ݰ-3*s:]yrOǁҜA:OǸj4YMwsԖ<a Q%إ;oĥ} ܖTX$< Qؗjsi:>T%ը@IuNxYq~V$EnS&I9 B[u So$(Z7 Y OTbeNqUÇ^|[΁vcE5IkH˳]9Tu'Aʜi |v aXGjBmr־B=cnHsR/u{w ^eOӲ,8ܔ )R v*IVtӥ<إ1hw먑uu X QJB (HN}ZtiTu%%:jP91,^Xb. R1PV4y2+K_O0'uݢu#].cʚV8٭uANV!]p 9@X,Y}:/? bܤ]Wٮr_k =nT5o1׻@ ϼx`՟KQC$g@ }sNu:_ D1Q!VPQ#q'U>߻^Mw+zҔ()JJRR()JJRR()JJUTZjw|HVO]'ͥZӥ ʉɲBM\Is(q]6mA O [HR W1V'ތd2xrUI-yeuT4em+?5+[YLgߒ]]2SQmJN97J7 j3aʺd5&,1ŹnA{fHX$}wۋ82|  ݽ))X`<޵r:j{~Xe[})+JBycsfz~:۷<n4GE%%C>\Uǜ1KIF@=[>ϕgC!)J@s`:ہMQ~o9Ra@('98Ƣ֩V7:q} ]IQ)B֙QCv3がfghamH} IhYeȏnXtkyKI<[J2zr U`;['F[ rC<gʾ{yiI\S4^;u_]vn5xnW/HqRR[)u,1̑ζ/-|% ˺*bms8N}`wIJ#I|EoBNj,(nj9*KOv`(tQRHݢ.v㶗RB+D$CH݁T(sRW]Ϭ]Tte>‡򗯏BIhn+⓯d6fMQ#$'Gz*Dzތѧ ==r2BnW\ߘg p !gxunZqrT96AH>3+p *-Z4n*) j;#ZO]- \"Y۽ q$pRrG=+i~ͶOdH5Ɛ$9# ʪY)5>L.@CJT@)y je,vXK 2HB0I$WyxB`z)JP-vZyMl]sӅ2DJK8PU~Oʪ/KrSCrIݟvk഼u}6 #Ҡ Л =h";EiwqPAXprZGQyu5-p܄FIJ#}{QPFgnI82IU~&c,-2!D{\+{k SmGzN5%/d4*8*s-Y/bFޔQ$% H'hКN֎c#AFcJΤ _}|k>tXY̋WIm݀(֯}$mtì⟽iW9H^0ۤ ǁK W?8B&N63'b8ܦۏ 'F`q&Du:m~%ˍ )Ke%`8VG?*ͪ%|:ާ} n((]`uOUhܖ$Х)^R)@)JP R)@)JP R lFpJơ j[:Rg )?]I'ur\;@xhؖU3p ([d6dx]oifS{d~ CR=qJ3| jWөf;qԯa`)>F-J@ vv:P]NRS3^>$.0nZ\)Kv> kwuROwj=3=g_K~Hi$DrYh{JQ :X^l 亷98g!)1eӳ} iV!?Y]CG$?!^ sKxڀxr8&:A:#Y^_:y)JLv8 ^*ܸ ɐh 9Aw1VH '%k Y~Dս7 קA[a0b[ihJ1j/kS_ABp<pC+DiJMQm֡B).(FE|[{^SU8 +O6]R8~?BY=8 ˧]!6⠺ZR\JR2H*"] bA[I^ܻܵ%;L%C9m#2V[9! 5Jn2mW'7TV[xu3lUiEsOqx3݂iðP4B(q<; ݟE|J{<1x:jp('إ)^ R)@)JP R)@)JP Z#RSeN)RH#j"M{v E{ZR-w%@Q$k}`; TZlkNsVż:Z[>k)]ַRˆ<:W15 4l;ֽPʠA9Wk^WQiu|?՗x׸v\h>ym7{LvL{x n$Xn]i#|k$Xs<1fu 8?_ _ K͡wiJ3$ǝ^t{%苀* R䂬'KHMmM҄t<~t;Ew{PB8Fp?ARD:[d? ws9-ԱJҴ;Oc[9H^1R1# :Sqg0Z?v:Z z8(HJZh#+oU=K˯JxeI'ծ mwLҔD;J树?Z.l;}Yf*Ṷ輖ydž{ _8kހKӖZ2dCd X7::]!NLs_NHiH$ mBGhrLV=b $U":{Ӎ>\T<-y*9zt(qՂy~fb/q_V 6\w{꘻VPKp'Ȑ?OҡvJul?MH\u } Ґ\v~y9u_b FuO:< OP2eO#㞧T DJटD{q.A0<*{Ug)^ R)@)JP R)@)JP xǮa[:W[4E79ݪw $ddϫCZu3-V[䔵m!]JuD$//,0F zs=ȾO# %Ǯ ܽQrڸ\ӄ1ݥxen7tqn$nWL뇚GkN OgOƜʤNwK$ $$`4V+5Ɣ m^RrR #9  ܢD[KTD}STtʿ5hΜN8>{+71E. Z %5Y \GPa@ּז9 +1VpƏ=Jvn2j=HmҴ$/J,' ؟CX%> #on#H8*9隿pK>PۍnQN/X/:4]]F& #'Ӷ%еg)Zuc8$'?#R}8v`#oVAQ-2۩I U`rvEQk(UL%I(pc9㳘 rٱ{֖v\h,=}My.)ca!NaJ–GSʡע"iS%B֗mC͚S|dCjm@%))KPVvhpSOE=Syu/(c)#=mNrQn6w;T aY.q+H8O^u}f} ;n%@$JU븵*>\]ykMڧuc^NY*dE熚FVi)Ķ8d (@))PRj}eYw7Z"#= bFjRFIK#NIlj5WͯtݵwoiwOƥN"å%FFK6s1.{ړ4v YZxB)@=݊n]cRQ ,>8V>*qoNk<֜_3qVmuUL[r*JJI>wί5&-I&mn)YR)@FÅfkuRVI' >N|qq/K Ύ^9?W*rB$is4 \!J7:|?_ yic뗞Ӝ>$3ʦJW9NMs,hc\B,3%KqK QǮRBn{+Uxt֔76 YsFW-‚P<𔁟* [sc҃q=q!T|V8>/>t=G?j7T.- lU5w5'|47rQ5NI: t9r=*HG,{J$u?)V~9# ׯ -69?.'a%U~NxsdQV8- ݢp"w\N}쓥qzS Ϗ[7ZGO^XImKRb_*'j\ar ʹ9jbdj()JJRR()JJRR(!%J!)$VCkOA9SoruD[$H=W*U[RY%rS_e=Iه7ɹ阯%Wqs%ݺȜduevre U^Ov`M]uO]QgTv[Q,sH_{Z4%ϩU$ *3Vڒڎ |<AuV)`Ai*z)XHk(t_CMms)'OȪmg(l̤! `+}Ds6dF˲%NƐ:~zO*^PZNoE0|oa [z>F62;׈$`%G,g暗97bJTPS@$xݻ>u犓;D&D0-$m m}R H}czP`\֥OO¯)?[weW!̩mNnLC{ K*P'>'ÕcZz)#;RW !iUGկM%H+Z?q5IvpاS_(TPWAKrjۭKQ %N;ԃd|+uM# φ%V&*C$(%;,εWNT!GIԛovt#sv2RfGxy)R@pP~> T;ڵ [ {|pޤB5wj7RQJRtoiutŸDV#ːO^ԀeОмp[Eh˙0&%DaI#(> r9J-pM6rf)Bn%% (sH)<URqqSB-IǼ'ϣKTN9J]!^3:-&+q$I 9xUUktdХ+P\4.sf;onVI|gr|x+IEfOB[IIe8qUhY7T]!_tdjScB˜eŷ#1zSca*>앭P(xJ8z( #Qޑls.+Gy#)o޲prO!Z~#7,=GuHqXl꽔s;G<%>u7JUpT,vpyI3 :υyp9ĪmՂtHQIJAbqWD? ?LM`-Y:xIW>#Xn# cޯ'?zX8^Q_ yG_c8~kIP d2ꇟ"޺P0RA_|&O ܦ[$'F?LVRkK*ݯNwYڂT(% xʳaﮎ4>[ӌy83bl# R)@)JP R)@+_m%2Pej-MP$yOj+!JD̉: ⾐@ZJI) zme{NOⳟzkk(EڳZnឮkR7٦Whm/b=!I  8 nou: |a]fSS$s>yύ}sW1!_q"9iAz2r\svgog볚TIo *BД m l+q9Wwi=NVUMプ5ׂ"> g!_*+JJ*oz }.@(`R?S_CȓXֿ5*%Jz6 dRB>xC<=ƱyĊc?""8ʒq6H|2>[8[FJ}`TÈݵm)i#>Rr~5Q'8 GLujxdxjLpCo+jsӗCʶoS1q[܂ 7>n*8R^k~\R|[el.|&}BU ӆV[Z۱G>s)B*Yhkh*,h> L6[qiw o -R$)ˌ xVqzqoiE5W_B-ŨPjHܧVyt s+UӡcO~צ52jn~%ō{Q WJTY)* =ŖdGZB@HլgjmTqH )%r0=BR|ki+󧴛b{V HC&P+}D3fdXmP?ܳ2JWAÍRPJFvﵷl/"8};e[]o\RAJ؎ѓ)ğH=%cvBj&T)|)$[ooX^ Ws~}Og@"][!siڳYC@5q*iҤ wa>k^{}Rd+h\I]1$l4=\tQ9?OҤ8V6t'h֜B[AS/1?Xj w :uOZZmx ~%X ]?IYZ^$R<8)R Zϙʓ*7!o*^ҜH 'dSJ޴-IՎHsqyrK,ӧ㧯{ӲecS-^# WO2xzdDy/, LyUBR~`v4kD#D)Ohl`=N\kO;[%`8y'/bMZq68ʵ$} ڕ֗ Jr#_U`W R|[7bM؎G\m䤨GֹmtbFD NRr S :ViXu6ǟieUH?[Jl'r5ڴ){2ҝ I?G\$VwWmC.Ϡ6Q'(H8,`bE[Uw([YZS˟2GO aJ8K 8{\߯bnAI5mY!N Č)呕 iQ+ }Wrkb8ɯ}J!x]1Vr-`(?5:) Nm[w jsUe8&N 4W"}?txp<yD @ Σ[p`JI+NG^8;(0H^O?gu_^Q`UזXo H9zi*yY_3OT}°MW\z9Ry42>'\NiP>4+C iԟ<uI˕jcs0$8٭֝{fMM\"06 +_u hzf> P%.-l}2+sMG5t R)JP R)@)JP R)@+ kEVP_iᤥaE@t$P+vͻ]e"EY䔊m ?3>Gp<?qpP匒#ZX\WkEi/Z~ۤo:B@8{7[I>k` G_fNP; 'm_nr5V4Jhۭ,bV- Ғ? Tv)K-%8 ?_lOõZ|zs|eyllNҒ"s H<>moSA'j7Rk[)yy8}Q Í/eGrxy0$ g޿[Q)lrBJzTX-I>C?皒#qb<$602b}e7]_ݺM BԐ66I$Vwnj<6RKeI$^@L+ڭQ$xN4en^έA-7zNz$@n6zV߀!IڴdZkRE<esӇQ:Y: QہbeЖP HgvD;N2\-VhBT PP R0\R[e .zJthT˓+r;,(Flܥw2HEvXQ,[mFh%))Ikvt*B0[3^ߞ pxapj|`˞TPAҲSH6-niZ!垪Q$5hUx]E.zVQ#*'塍4jZ< '֩.7WP-aN8# q#SF$d6Ԋkճw:QH*%-%^ u?~\3'ZO^xJ"ޠ36ɑ'<A UVJ)JJRT1@P.ө\8%(h)| )`su}9^i몽@TYx|@5R5b`˒]Zw=qUSjy,-i'Zqqݶ,ÎJ!-qV'5jx͉RS^S2'OԚsagw# QlnS,6Vh[] l$?JBBq0Nq(X?n*W|uuj9D?{IW+I~LXiܪZQIX8L!*].℺[:r3C÷Tާe)# miVN9c? CZ5&i]G,3$-w9l:S䎽SSĕwXSPɃ+WgMR=^:xV/I1YQf}u~GJOn1=~YM\^ӚEj%> KgÚƻiW^+qR()JJRR()JTJER&XC)B@$ 'SJh !|w.ewJU(*J,=FpyxO߭ˏa%[vNyWVOp)@uep.#oIu¡Ÿe=!\ľ $] ))=Z':uOפh N37;Eǣw-J@T{=Dz}?}EXXR-9:ꤔH h5q9qi@^2N}kSiZ\\aq--I.8[”zJ>kJBGGV2&)![6H?\uMFs(<{3B q`x {wFK-ik5 ]u!(EH|;⻺l&s&96[wK7M=,܇q-\y,4Ӊ< Ryx3IMIay3$:ه]7 =uHu@waD\oV9\`irO'P$T"ŋOLD[[? !JrAR|$r}(0ɖ|*^oJu*S}R9#a7{R{Suyekk&1\8iRLg5S:A\RAB\W@֧+/܋Dk"{CWh'[_+Ґ#I =𭿬k-B^)L)6)PJFNIea.O-^P#J 0+PylU<A7dc9p?­Hr69[V>]u<2?*RO~~5T>i7$ORlơuLqsMMƶV0B4gUC*ސ|Q-/?/)[*廝1O/*_D/r$p?yRˌo8G̐{$rh[KR C~+RNQJu3ܐ PsH>⻿Wu9k)[)@)JP R)@)JP ]bXS.SDX\pBJOk_4hl\T5*P[pHX#mJKyT;i^\Br1b٤UK8# flǚ`0X硅 ˡ%VAZ+y㋚QS-hRZl򑀔d 0Gzïj0E@>SP)#$3Xx:OMnd rWxԨ.(x(V2\PO'W;ZL#VsCW) QRA-a|AH~X}xYs-]dmTw6ݒV99bx%)$^͵>o8᎜5'R[YWk8Jٔ:c'%?,Um.өj^r,z `~^ DiڞJQ 1WF$dBJT2&b,KK`ZOBIc kq-{6IEjӮDEw- ;GϼO/!Q&b N>Hf%j9+m6&J$@dž1S(J5 q!U7mzNNd?0teŕ)BO@dE[8˱P_tҜi@<2~uP։}% aI*.z nCDfN+Ҡ] Y JBB@>{~F3ſʓ5ݕndYm5 y%g8a˩Ζ],:RZJ}@:νS-.j%d'c.YS&Zǣ k}ҔJRW˨Kn8cjGҾ@Z$ƃN"l{j>±f_m.G-I*RV<ZiZeFD. NaOfQPZ9X|: 86WCS*;hoWUM]u2%i)PIWw#".,{T d2e9[O9:*VѥLg?fzoSNlm'Ȍ3{gVGK]eǘׇi?Mok|J 9%?!XMw--ΕYpAUMлK;ጾ]͕ @+m~^OFxia+w-D5&apŻDhRƟ)* ;9zΆP}7aR|1kuˋ*YEA#6-ʊJ\iJAHQIU0TkW?dN . MAIUԒ8^w#զ3ʘ>7ɩ#i-S~?`J,p}AK\ꕎRQn% )L>$F7jd9ZZKCjחZS* Vܔz FGOO 5->)P:5U-9Vgy8>U|kѧˉm`#<N#_\%6$Fqm,+gT?yU նt6Zχ%ϚDe%dBO/w#PV]3dsV@0.2#֟vޣOV\P (=˝O.e'?Q_%[u>s[ˑ#5!2џAyx9G8StۊNJ/m9 W odUʓ̃>xqJA$p<}SZ]tf[U7@<жI'Jj*/ƙZmMĕAKZBK#[pAUApF+e8V/uzKEhC;ШCQ'&ꚹ五y?K4n,o*)Z?^--`L i)^r1C-~Y.j^v~֮JK^{N܊6*FA}F}jfief&bH,>0U V70X$<5;RgY!E c9zt5pbJxqڃLӊ}6)RSŅ(dj>Uv[&gQD,ˏ)P63$cA'>U]zNBcIPfPBz;>5%C*J8d++J$ ~a)@)JP R)@)JP=cYs ܊epSI A< (kgGyJ&ZҔ]܅$Hw$ںҤ}T);BvX 0.6HqDjr\<@chl|Qt|`HI>G'Zeb"ۭڎų!H**cn!!sRS$ҠΝ"_ kj^mݖxzkk%WTyx٬rSKKCiǁݻ$կC=E8+u>WfQ5 -{@ g S_-0e%(o(€0yyT4{ p|C|ƒ`ɒRTp>̣ݶpFyw:LGZ^Bڦ?:LQ)9`pM[^dHvtFҕ4rGZx l66pGhí^ʙ[nr[H@Qm<%K[BG4#O_R԰InT?ʥ9]#R$IYb#wZ@f pݒgTMO- ㌡8\Jc5\ " '(u'#8?ƤtT\'YI {Ԯ&O6uG f1EbvXY>{kd8 :V2Y1P JKrPK HnS(yXܕ$"\3/MvE7V JUn>*JdxP5 ʘ'=J~}~uYsڟh&v;9>h̵@ֶЭC^N}Ⱥ6,]JT2Xh @SP.3&:]rҶVQԄ[V銑R[_DaAaUy&%,Y߹댸<³.iKe{qm䢠}Dҟ[^ r1sK>\FN4@qꕤs-ƵgOmKkc9tlʊu VPGi#5)v[^!-ɐ2I= H:t۵y̆!Ҭ w\r8Y7)ˆo씩Dt_J[H [Q*8g>r'BMfpV58-Mȵ08} Q*GvM)Z)@)JP R)@*w 4~;'=2R@[JJxN֢O;y+^+m5ޖr]aqJqJh)I>@PA)_M~$RYmh>uEKDMha|Odu]¸%z>CjpBSV+E}T:)RB~"ޑlvU1Wk!Jϖ٫^d/Lx}-3ֲndƷ%BARN}jhF[XC^ |CJZۃ"IZ?_Swn_:?P}ˆu_qП _9|foGCe-eJON99+i`R]G1ٯPgSЫeN${N9@·+4zJySC&PYm' qʲ`?`ԝnm]x#$ 2:&4rY!YH4eurge#@QܓϞԤr睵tguΧ[v[P3T@$( B?bsSudL\~3v|\%֤7p2p]yІ@H4c?ܛEt/eГZ\_tzD"kA^)֟MG4nƊ[kXQhWTgsZx4vQjm{ oz'p(nD={Հ # gyWKx5śe`:qNMH|J ([k%I88ʀ?if tuCe\5]6 y kqJ{zqOL V^([h\)¶8F?(sppAMw٣whLYCti0y܊RKGΥSH,6m)y w{1%*PqD2o\_/3c-crm C6Lyxsuzknc31^5qk ZQaĥ9HmlS̨RIp>Hoyt]ePV$-/ HQ~UrNywlmqQ11o#ѻ_@4eרޅqaMLC*s+RbMb-F Qy=l:ڌ7Ll gV#_28~Bfu$\mmBր[ќt++`LjV.$p=+g͗ab 3.GXJuX!螢vJȻE`;)[}}w%)RSz'ڃ+RK>F[SvhSwC+VH|Xꟿ_ҲG5ukmFfRL[m0r2p u" XKekmK( JIrEKO+CIbƜB?Ŭc^KijbҤ-(ʒd~q'&dL|-bTuV0Ϙҿ`^ޅ1m]rOqQS3 ,!y%\\Ajy|>lJ'-:w.$qooNIy°S뜀k0ڏdU6R N|>D{S s? ƨ3q; RP@,\>gEur1Mj_8~P ('ĀQ;DuDnm5|┤dͶ ˆT[P)[KcRP •!:n~x_a:7}/m[,DWgSM.)ΰ3ٰL`ARAHHAJiC ?P~\Lrq!ԭBmJe%+HRM%I>h&hepx)6)g).1û}܁W(P,JHC }~=}̴>׼7pIlLO[,9@F.+ސtţ"JNs姞ygx: 5q% sz$ޣ !xHкBpط3JYw>nQhFzRjBR}6I՟Q6hgPId!+?z8e?U_3j}G@ܵɘ͵!2֊I~͒#+m6w{GqA4ۚK-D{j݂G%Z[r+hm pkJ,9-S봩2|2a Y>f\bŇĻ,ib0OƶNCž#c4#0M%  x*9 LvӚ7V*cvIjbmFvH#0GI,ZEZxqlZDZ"7+9KhH'ĜdI)@bVͳ6Ժ2u QNR}W+j"Y:.%kn٧]bKzm]@HlgD3ӊ>ybҡO΀r6x7L2q"_`m!Zx+=d]>FKcֵ~TO)JP R Tֱtc>dj;TU^Vr0W<ԑF:mn8R:kim] .;yfzMVJbƚyR{!>ۼ+pؖ./*-7M5di ,r%[%Ԇxk2S² ًDũ1K-*N6 Y7W+c8ΞuOB$JFG/d]8&;}o\t8I.Y)X;Hs+h 9ϿTPg\-(0xsjS֫t1jnszCᩒ%[JQȜSWlQ&w5Ď%>8ORLeÒ.Ym;O#ʽO>Aw{N3@557-crGLyd~`nսQ%Fmq'j8>Jt4+)`:Ԭ((xVr >΢@O[;Í,D무)K(m:֔XVMG I gcdL%F/)Ģ"2*tu]eK-d,duբkQS*D[_wҰ;'#Oĭ?폮G]u\s1@2/nI)rj_\Pg46ЯޣYW z=%$)y4`~#[8nS1֬!-ˢ!J;V_8Cd`tiϠ@YK~Jj\(mNgYFZjVR\Q$~TJ,sſ&2Oq=^/iV9Z5h-;rWjHYHS+)RT@$2pUWvZHKM! W_%I6s+fn[q5 ٤ԩ>a)O|c]5읡KзŹ32}'sO'ޕq##5鉓h=kj>j$*xΏ-!@"3<"}5٪wv "z:"{Jr}%>U9Mtgk5=Q)Jo[>j`I؟L^0jn#Pi79Kz̸a#dV _lMr^[ݩ`0YP]H˧_.vnϲ]m !G'qƳjΡ8RK-_q([;? -T]BA$2M;^pu =r-#*P(Tsѷ}i=\!Đ{>6 a1T}si7DŽQu7yڥi-G*hUg 4m*cc-eJ>P)JJRÙ<]ֲpF.lz S9v9k&5Μ-ċsE$:ZhKIWdGƺX- ք[IB!ZJI0 5uc,*圚N2yg1;TMní!kK/nFT4\t%-ZjQ+vSoMfn[{th]p[Fc'wy<:֦zeSBywk[2;]7&kXLJ~.s?  ccWR46Y'lޣX kf?1hWx7+'jSchdž*Ѽ3 &qH*O5˵NvH`~N>+.ݹ#~<]J{K]ym,rT>53'J%O)%[ܮOֽu5 <* >uNM1dDr:{$W԰y-XJ\ueED4÷ˑn# QJyXRN35pO˳Z0Vq#Y#'?psSi$D-)^IR[(cx❳!!kJBdN]8\\0a !Iဓӧr;slQ)j%ФM`ӎ2"iPD`+k_>ke=r 9R^%y9;8h`VHv9Q5rs9Q֎RhT`J e\gEoᬟ":HRHԑmMH\7X5H!aBJ*p(`''ae)t3QJ-BҰNRTI$`%3T+;*p$$Rn 3;!IW<"w5hόW4M._Zً JHNnd )Ԋ.3*¦ O{uZTeH؃"OpRߦ7oXJ KHaE[q.<,Jv8D]RVk)XBWǍ9w8-KswL`Ra2C%$ mW(v9$&j/vkOhKz{W=7 ^ÌW) ^j-/']#=J 6@LJ?>I*TbG'RR()JJRR()JJRxizS*%Hw$# *qZcǻ u WT02Tuq"$0jiĥE$C#kXxK6@ ) NN2z]ܜj/"z8PyjDh%>D>݌`Rㄷa1)<[!Z4MAyYh6v*>q=k8xÍ_1&>JOͫ*J'=* J⟉eQz+-n(@IW3^^)@b|INvŬ EkpQA+Bd#J'%[¸-Z=pL3(Q&KJ,hw]ֵe h>{䪫tWP5]8AE[e?ji1Ƽ#CiBIOFyFQq]NOcoTp'KNxc5x7qFAYE8ÆjۨFR\qd:tNg #$4z!>BˀZhw3 $=,d%ҹB cI Z%$uNպI꺕!ViA~CK(6ec̞u1qCZGJU6kkhl'/>m?kiZfNT2j?Y]ȑF ʕ=Mgh-[- P? ?ZqXFQ= fv=mqm(Jm)G^#4],-hZVdgS/cȈN#n6 )'yMjxVihy% {#|$S%|Ì+ӊ@\蒛r:A? 0=GcEnJ<9O .khEoo;<|ɫͦMiT ?QYr{C2Uw#WO;q8FpFOJSD2o.+Ƹ5~nn=mw>*@V]3-*ݡQ:DY9)=B#+lM{P^})-5)h؉!;Na8J9U~y; V>\mosJ.Rq&ep2f_yG?}C׵%K O,S;|:朗JRCRH-m~=*/ҺfFp&8Q {DxdBʅH[gw<yVu%!]zCR\ai[ь$+kl5'R- ڐ6IO>0/!$Zu%m&E=1SSh4vb[BՆidmqܕ5vK`ۓ]0.,R0) BZȳC ;J+􃌤ٴd\2+4d!q'À&wMNG6[8 m U(Ҧ櫩6cڪEH8wHקw`=!'hH)ܤ"AFnuZ-$[-^Ԍ)p`>C -ώү ^.-sg=|@yK#y8W+H/^:,ޛ+OJL};EI*KAa;J@(,LzGK@Zӧmu+qJU GĜdM]Q\XfX+)JP R)@)JP R)@*0m)Jf됢TA y^&b0;BZ+[\;Aϐ'g;ԟ&[dzhV6уQo=2#=1[8Z,ƛKuGykN%sXf[RV}"$$1S5[XSc~uz RhU R)@)JP Rlibplacebo-0.4.0/demos/sdl2.c000066400000000000000000000201101324021332500157130ustar00rootroot00000000000000/* Compiling: * * gcc sdl2.c -o ./sdl2 -O2 \ * $(pkg-config --cflags --libs sdl2 SDL2_image vulkan libplacebo) * * Notes: * * - This proof-of-concept is extremely naive. It uses global state, and * ignores uninitialization on errors (just exit()s). This is probably not * what you should be doing for a real program, but I wanted to avoid the * example becoming too complicated. * * License: CC0 / Public Domain */ #include #include #include #include #include #include #include #include #include #include #define WINDOW_WIDTH 640 #define WINDOW_HEIGHT 480 SDL_Window *window; VkSurfaceKHR surf; struct pl_context *ctx; const struct pl_vulkan *vk; const struct pl_vk_inst *vk_inst; const struct pl_swapchain *swapchain; // for rendering struct pl_plane img_plane; struct pl_plane osd_plane; struct pl_renderer *renderer; static void uninit() { pl_renderer_destroy(&renderer); pl_tex_destroy(vk->gpu, &img_plane.texture); pl_tex_destroy(vk->gpu, &osd_plane.texture); pl_swapchain_destroy(&swapchain); pl_vulkan_destroy(&vk); vkDestroySurfaceKHR(vk_inst->instance, surf, NULL); pl_vk_inst_destroy(&vk_inst); pl_context_destroy(&ctx); SDL_DestroyWindow(window); SDL_Quit(); } static void init_sdl() { if (SDL_Init(SDL_INIT_VIDEO) < 0) { fprintf(stderr, "Failed to initialize SDL2: %s\n", SDL_GetError()); exit(1); } window = SDL_CreateWindow("libplacebo demo", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, WINDOW_WIDTH, WINDOW_HEIGHT, SDL_WINDOW_SHOWN | SDL_WINDOW_VULKAN); if (!window) { fprintf(stderr, "Failed creating window: %s\n", SDL_GetError()); exit(1); } } static void init_placebo() { ctx = pl_context_create(PL_API_VER, &(struct pl_context_params) { .log_cb = pl_log_color, .log_level = PL_LOG_DEBUG, }); assert(ctx); } static void init_vulkan() { struct pl_vk_inst_params iparams = pl_vk_inst_default_params; #ifndef NDEBUG iparams.debug = true; #endif unsigned int num = 0; if (!SDL_Vulkan_GetInstanceExtensions(window, &num, NULL)) { fprintf(stderr, "Failed enumerating vulkan extensions: %s\n", SDL_GetError()); exit(1); } iparams.extensions = malloc(num * sizeof(const char *)); iparams.num_extensions = num; assert(iparams.extensions); bool ok = SDL_Vulkan_GetInstanceExtensions(window, &num, iparams.extensions); assert(ok); if (num > 0) { printf("Requesting %d additional vulkan extensions:\n", num); for (int i = 0; i < num; i++) printf(" %s\n", iparams.extensions[i]); } vk_inst = pl_vk_inst_create(ctx, &iparams); if (!vk_inst) { fprintf(stderr, "Failed creating vulkan instance!"); exit(2); } free(iparams.extensions); if (!SDL_Vulkan_CreateSurface(window, vk_inst->instance, &surf)) { fprintf(stderr, "Failed creating vulkan surface: %s\n", SDL_GetError()); exit(1); } struct pl_vulkan_params params = pl_vulkan_default_params; params.instance = vk_inst->instance; params.surface = surf; params.allow_software = true; vk = pl_vulkan_create(ctx, ¶ms); if (!vk) { fprintf(stderr, "Failed creating vulkan device!"); exit(2); } swapchain = pl_vulkan_create_swapchain(vk, &(struct pl_vulkan_swapchain_params) { .surface = surf, .present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR, }); if (!swapchain) { fprintf(stderr, "Failed creating vulkan swapchain!"); exit(2); } } static bool upload_plane(const char *filename, struct pl_plane *plane) { if (!filename) return true; SDL_Surface *img = IMG_Load(filename); if (!img) { fprintf(stderr, "Failed loading '%s': %s\n", filename, SDL_GetError()); return false; } const SDL_PixelFormat *fmt = img->format; if (SDL_ISPIXELFORMAT_INDEXED(fmt->format)) { SDL_Surface *fixed; fixed = SDL_CreateRGBSurfaceWithFormat(0, img->w, img->h, 32, SDL_PIXELFORMAT_ABGR8888); SDL_BlitSurface(img, NULL, fixed, NULL); SDL_FreeSurface(img); img = fixed; fmt = img->format; } struct pl_plane_data data = { .type = PL_FMT_UNORM, .width = img->w, .height = img->h, .pixel_stride = fmt->BytesPerPixel, .row_stride = img->pitch, .pixels = img->pixels, }; uint64_t masks[4] = { fmt->Rmask, fmt->Gmask, fmt->Bmask, fmt->Amask }; pl_plane_data_from_mask(&data, masks); bool ok = pl_upload_plane(vk->gpu, plane, &data); SDL_FreeSurface(img); return ok; } static void init_rendering(const char *img, const char *osd) { if (!upload_plane(img, &img_plane)) { fprintf(stderr, "Failed uploading image plane!\n"); exit(2); } if (!upload_plane(osd, &osd_plane)) fprintf(stderr, "Failed uploading OSD plane.. continuing anyway\n"); // Create a renderer instance renderer = pl_renderer_create(ctx, vk->gpu); } static void render_frame(const struct pl_swapchain_frame *frame) { const struct pl_tex *img = img_plane.texture; struct pl_image image = { .num_planes = 1, .planes = { img_plane }, .repr = pl_color_repr_unknown, .color = pl_color_space_unknown, .width = img->params.w, .height = img->params.h, }; // This seems to be the case for SDL2_image image.repr.alpha = PL_ALPHA_INDEPENDENT; // Use a slightly heavier upscaler struct pl_render_params render_params = pl_render_default_params; render_params.upscaler = &pl_filter_ewa_lanczos; struct pl_render_target target; pl_render_target_from_swapchain(&target, frame); const struct pl_tex *osd = osd_plane.texture; if (osd) { target.num_overlays = 1; target.overlays = &(struct pl_overlay) { .plane = osd_plane, .rect = { 0, 0, osd->params.w, osd->params.h }, .mode = PL_OVERLAY_NORMAL, .repr = image.repr, .color = image.color, }; } if (!pl_render_image(renderer, &image, &target, &render_params)) { fprintf(stderr, "Failed rendering frame!\n"); uninit(); exit(2); } } int main(int argc, char **argv) { if (argc < 2 || argc > 3) { fprintf(stderr, "Usage: ./sdl2 []\n"); return 255; } unsigned int start = SDL_GetTicks(); int ret = 0; init_sdl(); init_placebo(); init_vulkan(); init_rendering(argv[1], argc > 2 ? argv[2] : NULL); // Resize the window to match the content const struct pl_tex *img = img_plane.texture; SDL_SetWindowSize(window, img->params.w, img->params.h); unsigned int last = SDL_GetTicks(), frames = 0; printf("Took %u ms for initialization\n", last - start); while (true) { SDL_Event evt; while (SDL_PollEvent(&evt) == 1) { if (evt.type == SDL_QUIT) goto cleanup; } struct pl_swapchain_frame frame; bool ok = pl_swapchain_start_frame(swapchain, &frame); if (!ok) { SDL_Delay(10); continue; } render_frame(&frame); ok = pl_swapchain_submit_frame(swapchain); if (!ok) { fprintf(stderr, "Failed submitting frame!"); ret = 3; goto cleanup; } pl_swapchain_swap_buffers(swapchain); frames++; unsigned int now = SDL_GetTicks(); if (now - last > 5000) { printf("%u frames in %u ms = %f FPS\n", frames, now - last, 1000.0f * frames / (now - last)); last = now; frames = 0; } } cleanup: uninit(); return ret; } libplacebo-0.4.0/demos/vulkanlogo.png000066400000000000000000000120711324021332500176010ustar00rootroot00000000000000PNG  IHDR@U8=6bKGDIDATxy$Ex@NWMUo?qcd%FQ!bAE@aA;U=` J +I`eQYAoqy7]3$$w~'XHD:a9 >;V˜?MN[w߳ݟ.xzsj4W'B sB[+iq6H_Wv)Ugٲ0LY\n3/p>,̙N+R>$œg#'2J/1 En_wVft"='W\dՉ F Ln4uB_ l;R u I4ES0xН sHK|%/mBI>j= fx_; Z+MK~^a􈞀Jc iT }RWW!z<1 Sjp,LR}REpʧ0aIĴFIEF]{ZI ÔGWe>6߬!> aENN!Ytl;^Kbf0?dC\GkA ,4z`GCJ} G&u>˱HL%R:sFdEaB^tAz1̤qz=Y㻣|NH"C+,$|Ϣ?O(;:Zi9~emo[4 \S:<$98f!ڰta1nF:Ì}tlJ"!=BN~䳑92s3ᾯ Mp.a 2zt@/byK/IlM3j[aȥпBeԋ'5R?+X6?U&VO0L~ #Q*6FХ 11̈~VuPW !F" ol)&\:<맦b#,jADgÌV5TnwB}yb> "USa%R 1%#zD%JIb ?`8 J&B[ĜP @=->SI5cGd6\~SAp>C1NҼ2$ӀtT Ƕ~ΦZO Mŏa&10[>izV *&aA y#,LrH幵q``ԙi Zc̛[$Ȅ\Y(G GIeGH:#NEzBjT 94> <2!oJ,pGh\-n y_/  <:TdOs.P~Ak x ;"D  dhVnA7x|L*r9}hstMqC'u)]wOK}*/bE4FO=9oZ e 4@t,3b6.e4(o(a76l[/+<$ʊ#~U|"էlzXbL"# ?LJiLc|s 72oFK|ȅ-vo9iuI~pRv,7)6zCkz9:N(WN_.Ow8\./!ƗfYk}\:^޽r`5~æ5jWeki]E}CT\[ЎW.*öi[р+Lg}'bZa> jq/U!(k=q]14L * x-RF7' `U}/_~Ezl 3ly%TiglQ_E|'lJavޔ{a罈6y!XhL7\Y#`PE|4,9Wd\Z.d4fx=$7Y)-RZ킲0{\?QP[fz-E.lY@l0,P~0zV!޸mEB]ߐIsS/KlU5+ʗ޹XUa|A{`]* =D "QAAIDETdE^3"a4͢)/]0@)lpNouf?xΙ_o}S#E>f0>MS*Xm$jh|Fe3Gxw*b|;<}l76rW=T}h*v`'[vW,ς0~g"@?t1NpN^p>.Iiڵc>nTi$sƷ#O~̻{`a2aڄ 䱤y*T%$}u-&B[2χ׾cSm* p]7}kw +zˉ!ϱDHm,،ɴEDL0v% FŚ=!a'X|"MYv\xN2i(UR<7+DAfyOZ6;hIzoM-7?_^f56vNe2;H7>ώSI#:ڱ9KY8 g+ؐbkC\Xh\E@^B۝c0(Gw N]~Є`c:afk(;PJ#(OL|/.G[F!@ :q<ѧn-p k @$[^*?EW/L Zagzb;DdH1D=^~Ъ@Cgi*0G:%\ E"xHO∡@^bT @ajJ> @ ċ|xF>QC#V0>Y([-.~0~Y;P>df+ۊTN[JD!@²6rs$ !eVԝ e!@TL]t)ށd4E,us3{G<9I&beKNBde @Lӫ]m.#Tn|΂O+{\cSSř `B<2R*hl|{O]d۠7E7^l5x RNh0Nsp$+&0H* yOT D'@kȳU Y=)vn &8zPj鱛rE`\ʧGP6cu Pڳ/Rc ' q@#`fEsg0 @"o[/T @AH֪=߻x@D7IGopaZJXapC~?ZdPzQe:" WfUoV'io&4^`/j^{me ^QmgO]z>x)}؜D5(*<:,ڂx.i ES 'OH"obA)pF.U*pd։^z>`*u,Gr~ :[kD8wsu"@ww t?;4·ZSF$ FY/8=_7BNZI5$+*At%Lq1M׋u-:g!y0IIB1T7FcshM!+:R v M?4R8;&9.E#^wȑSGsȯr-xO_*) ރr D ņ#@`gO6-`~_hV"@cQNidr[7SKZ~(%L8of9LV[cD" ߐhv6&M?IqXzT̩:W12'N۸ e|E;kqFX# v<`|X;?m~6~UM|Faٲ  Mrb6HQCSy>d3[3Ugr^?xcBV6?Ya|u^p}}ӄX{D+`%89̀$; DXӪAq #/'*8Rkށ8lc D1T{\vG9W^8՘@MxۘG9'wF'n?k' ~ \yă]h-SVɖ':4H|3G-} cromT8pW|n!@0v$@lt{_$y2 e;*PA>N0rJ Cb"%IENDB`libplacebo-0.4.0/etc/000077500000000000000000000000001324021332500143555ustar00rootroot00000000000000libplacebo-0.4.0/etc/libplacebo-9999.ebuild000066400000000000000000000016601324021332500202630ustar00rootroot00000000000000# Copyright 1999-2017 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 EAPI=6 if [[ "${PV}" == "9999" ]]; then EGIT_REPO_URI="https://github.com/haasn/libplacebo" inherit git-r3 else KEYWORDS="~amd64" SRC_URI="https://github.com/haasn/libplacebo/archive/v${PV}.tar.gz -> ${P}.tar.gz" fi inherit meson multilib-minimal DESCRIPTION="Reusable library for GPU-accelerated image processing primitives" HOMEPAGE="https://github.com/haasn/libplacebo" LICENSE="LGPLv2.1+" SLOT="0" IUSE="shaderc vulkan" RDEPEND="shaderc? ( dev-util/shaderc[${MULTILIB_USEDEP}] ) vulkan? ( media-libs/vulkan-loader[${MULTILIB_USEDEP}] )" DEPEND="${RDEPEND}" DOCS="README.md" multilib_src_configure() { local emesonargs=( -D shaderc=$(usex shaderc true false) -D vulkan=$(usex vulkan true false) ) meson_src_configure } multilib_src_compile() { eninja } multilib_src_install() { DESTDIR="${D}" eninja install } libplacebo-0.4.0/meson.build000066400000000000000000000001451324021332500157440ustar00rootroot00000000000000project('libplacebo', 'c', license: 'LGPL2.1+', default_options: ['c_std=c99'], ) subdir('src') libplacebo-0.4.0/meson_options.txt000066400000000000000000000007601324021332500172420ustar00rootroot00000000000000# Optional components option('vulkan', type: 'combo', choices: ['auto', 'true', 'false'], description: 'Vulkan-based renderer') option('shaderc', type: 'combo', choices: ['auto', 'true', 'false'], description: 'libshaderc SPIR-V compiler') # Miscellaneous option('tests', type: 'boolean', value: false, description: 'Enable building the test cases') option('bench', type: 'boolean', value: false, description: 'Enable building benchmarks (`meson test benchmark`)') libplacebo-0.4.0/src/000077500000000000000000000000001324021332500143715ustar00rootroot00000000000000libplacebo-0.4.0/src/3rdparty/000077500000000000000000000000001324021332500161415ustar00rootroot00000000000000libplacebo-0.4.0/src/3rdparty/siphash.c000066400000000000000000000073131324021332500177500ustar00rootroot00000000000000/* SipHash reference C implementation Modified for use by libplacebo: - Hard-coded a fixed key (k0 and k1) - Hard-coded the output size to 64 bits - Return the result vector directly Copyright (c) 2012-2016 Jean-Philippe Aumasson Copyright (c) 2012-2014 Daniel J. Bernstein To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. . */ #include "siphash.h" /* default: SipHash-2-4 */ #define cROUNDS 2 #define dROUNDS 4 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) #define U8TO64_LE(p) \ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) #define SIPROUND \ do { \ v0 += v1; \ v1 = ROTL(v1, 13); \ v1 ^= v0; \ v0 = ROTL(v0, 32); \ v2 += v3; \ v3 = ROTL(v3, 16); \ v3 ^= v2; \ v0 += v3; \ v3 = ROTL(v3, 21); \ v3 ^= v0; \ v2 += v1; \ v1 = ROTL(v1, 17); \ v1 ^= v2; \ v2 = ROTL(v2, 32); \ } while (0) uint64_t siphash64(const uint8_t *in, const size_t inlen) { uint64_t v0 = 0x736f6d6570736575ULL; uint64_t v1 = 0x646f72616e646f6dULL; uint64_t v2 = 0x6c7967656e657261ULL; uint64_t v3 = 0x7465646279746573ULL; uint64_t k0 = 0xfe9f075098ddb0faULL; uint64_t k1 = 0x68f7f03510e5285cULL; uint64_t m; int i; const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); const int left = inlen & 7; uint64_t b = ((uint64_t)inlen) << 56; v3 ^= k1; v2 ^= k0; v1 ^= k1; v0 ^= k0; for (; in != end; in += 8) { m = U8TO64_LE(in); v3 ^= m; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= m; } switch (left) { case 7: b |= ((uint64_t)in[6]) << 48; case 6: b |= ((uint64_t)in[5]) << 40; case 5: b |= ((uint64_t)in[4]) << 32; case 4: b |= ((uint64_t)in[3]) << 24; case 3: b |= ((uint64_t)in[2]) << 16; case 2: b |= ((uint64_t)in[1]) << 8; case 1: b |= ((uint64_t)in[0]); break; case 0: break; } v3 ^= b; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= b; v2 ^= 0xff; for (i = 0; i < dROUNDS; ++i) SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; return b; } libplacebo-0.4.0/src/3rdparty/siphash.h000066400000000000000000000001621324021332500177500ustar00rootroot00000000000000#pragma once #include #include uint64_t siphash64(const uint8_t *in, const size_t inlen); libplacebo-0.4.0/src/bstr/000077500000000000000000000000001324021332500153435ustar00rootroot00000000000000libplacebo-0.4.0/src/bstr/README000066400000000000000000000005371324021332500162300ustar00rootroot00000000000000bstr ("byte string") is an abstraction for working with strings in a start/length representation rather than scanning for \0 terminators. This is important for both handling arbitrary binary data as well as being more efficient for large string operations. bstr is based on the TA/talloc helpers. All of the code is ported from mpv (https://mpv.io). libplacebo-0.4.0/src/bstr/bstr.c000066400000000000000000000233341324021332500164660ustar00rootroot00000000000000/* * Copyright (c) 2017 the mpv developers * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * mpv is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include #include #include #include #include "ta/talloc.h" #include "bstr.h" #include "ctype.h" #define MIN(a, b) ((a) > (b) ? (b) : (a)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) int bstrcmp(struct bstr str1, struct bstr str2) { int ret = 0; if (str1.len && str2.len) ret = memcmp(str1.start, str2.start, MIN(str1.len, str2.len)); if (!ret) { if (str1.len == str2.len) return 0; else if (str1.len > str2.len) return 1; else return -1; } return ret; } int bstrcasecmp(struct bstr str1, struct bstr str2) { int ret = 0; if (str1.len && str2.len) ret = strncasecmp(str1.start, str2.start, MIN(str1.len, str2.len)); if (!ret) { if (str1.len == str2.len) return 0; else if (str1.len > str2.len) return 1; else return -1; } return ret; } int bstrchr(struct bstr str, int c) { for (int i = 0; i < str.len; i++) if (str.start[i] == c) return i; return -1; } int bstrrchr(struct bstr str, int c) { for (int i = str.len - 1; i >= 0; i--) if (str.start[i] == c) return i; return -1; } int bstrcspn(struct bstr str, const char *reject) { int i; for (i = 0; i < str.len; i++) if (strchr(reject, str.start[i])) break; return i; } int bstrspn(struct bstr str, const char *accept) { int i; for (i = 0; i < str.len; i++) if (!strchr(accept, str.start[i])) break; return i; } int bstr_find(struct bstr haystack, struct bstr needle) { for (int i = 0; i < haystack.len; i++) if (bstr_startswith(bstr_splice(haystack, i, haystack.len), needle)) return i; return -1; } struct bstr bstr_lstrip(struct bstr str) { while (str.len && mp_isspace(*str.start)) { str.start++; str.len--; } return str; } struct bstr bstr_strip(struct bstr str) { str = bstr_lstrip(str); while (str.len && mp_isspace(str.start[str.len - 1])) str.len--; return str; } struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest) { int start; for (start = 0; start < str.len; start++) if (!strchr(sep, str.start[start])) break; str = bstr_cut(str, start); int end = bstrcspn(str, sep); if (rest) { *rest = bstr_cut(str, end); } return bstr_splice(str, 0, end); } // Unlike with bstr_split(), tok is a string, and not a set of char. // If tok is in str, return true, and: concat(out_left, tok, out_right) == str // Otherwise, return false, and set out_left==str, out_right=="" bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right) { bstr bsep = bstr0(tok); int pos = bstr_find(str, bsep); if (pos < 0) pos = str.len; *out_left = bstr_splice(str, 0, pos); *out_right = bstr_cut(str, pos + bsep.len); return pos != str.len; } struct bstr bstr_splice(struct bstr str, int start, int end) { if (start < 0) start += str.len; if (end < 0) end += str.len; end = MIN(end, str.len); start = MAX(start, 0); end = MAX(end, start); str.start += start; str.len = end - start; return str; } long long bstrtoll(struct bstr str, struct bstr *rest, int base) { str = bstr_lstrip(str); char buf[51]; int len = MIN(str.len, 50); memcpy(buf, str.start, len); buf[len] = 0; char *endptr; long long r = strtoll(buf, &endptr, base); if (rest) *rest = bstr_cut(str, endptr - buf); return r; } double bstrtod(struct bstr str, struct bstr *rest) { str = bstr_lstrip(str); char buf[101]; int len = MIN(str.len, 100); memcpy(buf, str.start, len); buf[len] = 0; char *endptr; double r = strtod(buf, &endptr); if (rest) *rest = bstr_cut(str, endptr - buf); return r; } struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str) { if (str.len == 0) return NULL; int count = 0; for (int i = 0; i < str.len; i++) if (str.start[i] == '\n') count++; if (str.start[str.len - 1] != '\n') count++; struct bstr *r = talloc_array_ptrtype(talloc_ctx, r, count); unsigned char *p = str.start; for (int i = 0; i < count - 1; i++) { r[i].start = p; while (*p++ != '\n'); r[i].len = p - r[i].start; } r[count - 1].start = p; r[count - 1].len = str.start + str.len - p; return r; } struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c) { int pos = bstrchr(str, c); if (pos < 0) pos = str.len; if (rest) *rest = bstr_cut(str, pos + 1); return bstr_splice(str, 0, pos + 1); } struct bstr bstr_strip_linebreaks(struct bstr str) { if (bstr_endswith0(str, "\r\n")) { str = bstr_splice(str, 0, str.len - 2); } else if (bstr_endswith0(str, "\n")) { str = bstr_splice(str, 0, str.len - 1); } return str; } bool bstr_eatstart(struct bstr *s, struct bstr prefix) { if (!bstr_startswith(*s, prefix)) return false; *s = bstr_cut(*s, prefix.len); return true; } bool bstr_eatend(struct bstr *s, struct bstr prefix) { if (!bstr_endswith(*s, prefix)) return false; s->len -= prefix.len; return true; } void bstr_lower(struct bstr str) { for (int i = 0; i < str.len; i++) str.start[i] = mp_tolower(str.start[i]); } int bstr_sscanf(struct bstr str, const char *format, ...) { char *ptr = bstrdup0(NULL, str); va_list va; va_start(va, format); int ret = vsscanf(ptr, format, va); va_end(va); talloc_free(ptr); return ret; } static void resize_append(void *talloc_ctx, bstr *s, size_t append_min) { size_t size = talloc_get_size(s->start); assert(s->len <= size); if (append_min > size - s->len) { if (append_min < size) append_min = size; // preallocate in power of 2s if (size >= SIZE_MAX / 2 || append_min >= SIZE_MAX / 2) abort(); // oom s->start = talloc_realloc_size(talloc_ctx, s->start, size + append_min); } } // Append the string, so that *s = *s + append. s->start is expected to be // a talloc allocation (which can be realloced) or NULL. // This function will always implicitly append a \0 after the new string for // convenience. // talloc_ctx will be used as parent context, if s->start is NULL. void bstr_xappend(void *talloc_ctx, bstr *s, bstr append) { if (!append.len) return; resize_append(talloc_ctx, s, append.len + 1); memcpy(s->start + s->len, append.start, append.len); s->len += append.len; s->start[s->len] = '\0'; } void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...) { va_list ap; va_start(ap, fmt); bstr_xappend_vasprintf(talloc_ctx, s, fmt, ap); va_end(ap); } // Exactly as bstr_xappend(), but with a formatted string. void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list ap) { int size; va_list copy; va_copy(copy, ap); size_t avail = talloc_get_size(s->start) - s->len; char *dest = s->start ? s->start + s->len : NULL; char c; if (avail < 1) dest = &c; size = vsnprintf(dest, MAX(avail, 1), fmt, copy); va_end(copy); if (size < 0) abort(); if (avail < 1 || size + 1 > avail) { resize_append(talloc_ctx, s, size + 1); vsnprintf(s->start + s->len, size + 1, fmt, ap); } s->len += size; } bool bstr_case_startswith(struct bstr s, struct bstr prefix) { struct bstr start = bstr_splice(s, 0, prefix.len); return start.len == prefix.len && bstrcasecmp(start, prefix) == 0; } bool bstr_case_endswith(struct bstr s, struct bstr suffix) { struct bstr end = bstr_cut(s, -suffix.len); return end.len == suffix.len && bstrcasecmp(end, suffix) == 0; } struct bstr bstr_strip_ext(struct bstr str) { int dotpos = bstrrchr(str, '.'); if (dotpos < 0) return str; return (struct bstr){str.start, dotpos}; } struct bstr bstr_get_ext(struct bstr s) { int dotpos = bstrrchr(s, '.'); if (dotpos < 0) return (struct bstr){NULL, 0}; return bstr_splice(s, dotpos + 1, s.len); } static int h_to_i(unsigned char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; return -1; // invalid char } bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out) { if (!out) return false; char *arr = talloc_array(talloc_ctx, char, hex.len / 2); int len = 0; while (hex.len >= 2) { int a = h_to_i(hex.start[0]); int b = h_to_i(hex.start[1]); hex = bstr_splice(hex, 2, hex.len); if (a < 0 || b < 0) { talloc_free(arr); return false; } arr[len++] = (a << 4) | b; } *out = (struct bstr){ .start = arr, .len = len }; return true; } libplacebo-0.4.0/src/bstr/bstr.h000066400000000000000000000153101324021332500164660ustar00rootroot00000000000000/* * Copyright (c) 2017 the mpv developers * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * mpv is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include #include "osdep/compiler.h" #include "ta/talloc.h" #include "3rdparty/siphash.h" /* NOTE: 'len' is size_t, but most string-handling functions below assume * that input size has been sanity checked and len fits in an int. */ typedef struct bstr { unsigned char *start; size_t len; } bstr; // If str.start is NULL, return NULL. static inline char *bstrdup0(void *talloc_ctx, struct bstr str) { return talloc_strndup(talloc_ctx, (char *)str.start, str.len); } // Like bstrdup0(), but always return a valid C-string. static inline char *bstrto0(void *talloc_ctx, struct bstr str) { return str.start ? bstrdup0(talloc_ctx, str) : talloc_strdup(talloc_ctx, ""); } // Return start = NULL iff that is true for the original. static inline struct bstr bstrdup(void *talloc_ctx, struct bstr str) { struct bstr r = { NULL, str.len }; if (str.start) r.start = (unsigned char *)talloc_memdup(talloc_ctx, str.start, str.len); return r; } static inline struct bstr bstr0(const char *s) { return (struct bstr){(unsigned char *)s, s ? strlen(s) : 0}; } int bstrcmp(struct bstr str1, struct bstr str2); int bstrcasecmp(struct bstr str1, struct bstr str2); int bstrchr(struct bstr str, int c); int bstrrchr(struct bstr str, int c); int bstrspn(struct bstr str, const char *accept); int bstrcspn(struct bstr str, const char *reject); int bstr_find(struct bstr haystack, struct bstr needle); struct bstr *bstr_splitlines(void *talloc_ctx, struct bstr str); struct bstr bstr_lstrip(struct bstr str); struct bstr bstr_strip(struct bstr str); struct bstr bstr_split(struct bstr str, const char *sep, struct bstr *rest); bool bstr_split_tok(bstr str, const char *tok, bstr *out_left, bstr *out_right); struct bstr bstr_splice(struct bstr str, int start, int end); long long bstrtoll(struct bstr str, struct bstr *rest, int base); double bstrtod(struct bstr str, struct bstr *rest); void bstr_lower(struct bstr str); int bstr_sscanf(struct bstr str, const char *format, ...); // Decode a string containing hexadecimal data. All whitespace will be silently // ignored. When successful, this allocates a new array to store the output. bool bstr_decode_hex(void *talloc_ctx, struct bstr hex, struct bstr *out); // Return the text before the occurrence of a character, and return it. Change // *rest to point to the text following this character. (rest can be NULL.) struct bstr bstr_splitchar(struct bstr str, struct bstr *rest, const char c); // Like bstr_splitchar. Trailing newlines are not stripped. static inline struct bstr bstr_getline(struct bstr str, struct bstr *rest) { return bstr_splitchar(str, rest, '\n'); } // Strip one trailing line break. This is intended for use with bstr_getline, // and will remove the trailing \n or \r\n sequence. struct bstr bstr_strip_linebreaks(struct bstr str); void bstr_xappend(void *talloc_ctx, bstr *s, bstr append); void bstr_xappend_asprintf(void *talloc_ctx, bstr *s, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); void bstr_xappend_vasprintf(void *talloc_ctx, bstr *s, const char *fmt, va_list va) PRINTF_ATTRIBUTE(3, 0); // Locale-invariant versions of xappend_(v)asprintf // // NOTE: These only support %d, %zu, %f, %c and %s, with no other length // modifiers or combinations. Calling them on an invalid string will abort, so // only use on known format strings! void bstr_xappend_asprintf_c(void *talloc_ctx, bstr *s, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); void bstr_xappend_vasprintf_c(void *talloc_ctx, bstr *s, const char *fmt, va_list va) PRINTF_ATTRIBUTE(3, 0); // If s starts/ends with prefix, return true and return the rest of the string // in s. bool bstr_eatstart(struct bstr *s, struct bstr prefix); bool bstr_eatend(struct bstr *s, struct bstr prefix); bool bstr_case_startswith(struct bstr s, struct bstr prefix); bool bstr_case_endswith(struct bstr s, struct bstr suffix); struct bstr bstr_strip_ext(struct bstr str); struct bstr bstr_get_ext(struct bstr s); static inline struct bstr bstr_cut(struct bstr str, int n) { if (n < 0) { n += str.len; if (n < 0) n = 0; } if (((size_t)n) > str.len) n = str.len; return (struct bstr){str.start + n, str.len - n}; } static inline bool bstr_startswith(struct bstr str, struct bstr prefix) { if (str.len < prefix.len) return false; return !memcmp(str.start, prefix.start, prefix.len); } static inline bool bstr_startswith0(struct bstr str, const char *prefix) { return bstr_startswith(str, bstr0(prefix)); } static inline bool bstr_endswith(struct bstr str, struct bstr suffix) { if (str.len < suffix.len) return false; return !memcmp(str.start + str.len - suffix.len, suffix.start, suffix.len); } static inline bool bstr_endswith0(struct bstr str, const char *suffix) { return bstr_endswith(str, bstr0(suffix)); } static inline int bstrcmp0(struct bstr str1, const char *str2) { return bstrcmp(str1, bstr0(str2)); } static inline bool bstr_equals(struct bstr str1, struct bstr str2) { if (str1.len != str2.len) return false; return str1.start == str2.start || bstrcmp(str1, str2) == 0; } static inline bool bstr_equals0(struct bstr str1, const char *str2) { return bstr_equals(str1, bstr0(str2)); } static inline int bstrcasecmp0(struct bstr str1, const char *str2) { return bstrcasecmp(str1, bstr0(str2)); } static inline int bstr_find0(struct bstr haystack, const char *needle) { return bstr_find(haystack, bstr0(needle)); } static inline bool bstr_eatstart0(struct bstr *s, const char *prefix) { return bstr_eatstart(s, bstr0(prefix)); } static inline bool bstr_eatend0(struct bstr *s, const char *prefix) { return bstr_eatend(s, bstr0(prefix)); } static inline uint64_t bstr_hash64(struct bstr str) { return siphash64(str.start, str.len); } // create a pair (not single value!) for "%.*s" printf syntax #define BSTR_P(bstr) (int)((bstr).len), ((bstr).start ? (char*)(bstr).start : "") #define WHITESPACE " \f\n\r\t\v" libplacebo-0.4.0/src/bstr/ctype.h000066400000000000000000000016501324021332500166420ustar00rootroot00000000000000#pragma once // Roughly follows C semantics, but doesn't account for EOF, allows char as // parameter, and is locale independent (always uses "C" locale). static inline int mp_isprint(char c) { return (unsigned char)c >= 32; } static inline int mp_isspace(char c) { return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c =='\v'; } static inline int mp_isupper(char c) { return c >= 'A' && c <= 'Z'; } static inline int mp_islower(char c) { return c >= 'a' && c <= 'z'; } static inline int mp_isdigit(char c) { return c >= '0' && c <= '9'; } static inline int mp_isalpha(char c) { return mp_isupper(c) || mp_islower(c); } static inline int mp_isalnum(char c) { return mp_isalpha(c) || mp_isdigit(c); } static inline char mp_tolower(char c) { return mp_isupper(c) ? c - 'A' + 'a' : c; } static inline char mp_toupper(char c) { return mp_islower(c) ? c - 'a' + 'A' : c; } libplacebo-0.4.0/src/bstr/format.c000066400000000000000000000255351324021332500170110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include #include "bstr.h" int ccStrPrintInt32( char *str, int32_t n ); int ccStrPrintUint32( char *str, uint32_t n ); int ccStrPrintInt64( char *str, int64_t n ); int ccStrPrintUint64( char *str, uint64_t n ); int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ); #define CC_STR_PRINT_BUFSIZE_INT32 (12) #define CC_STR_PRINT_BUFSIZE_UINT32 (11) #define CC_STR_PRINT_BUFSIZE_INT64 (21) #define CC_STR_PRINT_BUFSIZE_UINT64 (20) void bstr_xappend_asprintf_c(void *tactx, bstr *s, const char *fmt, ...) { va_list ap; va_start(ap, fmt); bstr_xappend_vasprintf_c(tactx, s, fmt, ap); va_end(ap); } void bstr_xappend_vasprintf_c(void *tactx, bstr *s, const char *fmt, va_list ap) { for (const char *c; (c = strchr(fmt, '%')) != NULL; fmt = c + 1) { // Append the preceding string literal bstr_xappend(tactx, s, (struct bstr) { (char *) fmt, c - fmt }); c++; // skip '%' char buf[32]; int len; // The format character follows the % sign switch (c[0]) { case '%': bstr_xappend(tactx, s, bstr0("%")); continue; case 'c': buf[0] = (char) va_arg(ap, int); bstr_xappend(tactx, s, (struct bstr) { buf, 1 }); continue; case 's': { const char *arg = va_arg(ap, void *); bstr_xappend(tactx, s, bstr0(arg)); continue; } case 'd': len = ccStrPrintInt32(buf, va_arg(ap, int)); bstr_xappend(tactx, s, (struct bstr) { buf, len }); continue; case 'z': assert(c[1] == 'u'); len = ccStrPrintUint64(buf, va_arg(ap, size_t)); bstr_xappend(tactx, s, (struct bstr) { buf, len }); c++; continue; case 'f': len = ccStrPrintDouble(buf, sizeof(buf), 6, va_arg(ap, double)); bstr_xappend(tactx, s, (struct bstr) { buf, len }); continue; default: fprintf(stderr, "Invalid conversion character: '%c'!\n", c[0]); abort(); } } // Append the remaining string literal bstr_xappend(tactx, s, bstr0(fmt)); } /* ***************************************************************************** * * Copyright (c) 2007-2016 Alexis Naveros. * Modified for use with libplacebo by Niklas Haas * Changes include: * - Removed a CC_MIN macro dependency by equivalent logic * - Removed CC_ALWAYSINLINE * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * ----------------------------------------------------------------------------- */ static const char ccStrPrintDecimalTable[201] = { "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899" }; static inline int ccStrPrintLength32( uint32_t n ) { int size; if( n >= 10000 ) { if( n >= 10000000 ) { if( n >= 1000000000 ) size = 10; else if( n >= 100000000 ) size = 9; else size = 8; } else if( n >= 1000000 ) size = 7; else if( n >= 100000 ) size = 6; else size = 5; } else { if( n >= 100 ) { if( n >= 1000 ) size = 4; else size = 3; } else if( n >= 10 ) size = 2; else size = 1; } return size; } static inline int ccStrPrintLength64( uint64_t n ) { int size; if( n >= 10000 ) { if( n >= 10000000 ) { if( n >= 10000000000LL ) { if( n >= 10000000000000LL ) { if( n >= 10000000000000000LL ) { if( n >= 10000000000000000000ULL ) size = 20; else if( n >= 1000000000000000000LL ) size = 19; else if( n >= 100000000000000000LL ) size = 18; else size = 17; } else if( n >= 1000000000000000LL ) size = 16; else if( n >= 100000000000000LL ) size = 15; else size = 14; } else if( n >= 1000000000000LL ) size = 13; else if( n >= 100000000000LL ) size = 12; else size = 11; } else if( n >= 1000000000 ) size = 10; else if( n >= 100000000 ) size = 9; else size = 8; } else { if( n >= 1000000 ) size = 7; else if( n >= 100000 ) size = 6; else size = 5; } } else if( n >= 100 ) { if( n >= 1000 ) size = 4; else size = 3; } else if( n >= 10 ) size = 2; else size = 1; return size; } int ccStrPrintInt32( char *str, int32_t n ) { int sign, size, retsize, pos; uint32_t val32; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } sign = -( n < 0 ); val32 = ( n ^ sign ) - sign; size = ccStrPrintLength32( val32 ); if( sign ) { size++; str[0] = '-'; } retsize = size; str[size] = 0; str += size - 1; while( val32 >= 100 ) { pos = val32 % 100; val32 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val32 > 0 ) { *str-- = '0' + ( val32 % 10 ); val32 /= 10; } return retsize; } int ccStrPrintUint32( char *str, uint32_t n ) { int size, retsize, pos; uint32_t val32; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } val32 = n; size = ccStrPrintLength32( val32 ); retsize = size; str[size] = 0; str += size - 1; while( val32 >= 100 ) { pos = val32 % 100; val32 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val32 > 0 ) { *str-- = '0' + ( val32 % 10 ); val32 /= 10; } return retsize; } int ccStrPrintInt64( char *str, int64_t n ) { int sign, size, retsize, pos; uint64_t val64; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } sign = -( n < 0 ); val64 = ( n ^ sign ) - sign; size = ccStrPrintLength64( val64 ); if( sign ) { size++; str[0] = '-'; } retsize = size; str[size] = 0; str += size - 1; while( val64 >= 100 ) { pos = val64 % 100; val64 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val64 > 0 ) { *str-- = '0' + ( val64 % 10 ); val64 /= 10; } return retsize; } int ccStrPrintUint64( char *str, uint64_t n ) { int size, retsize, pos; uint64_t val64; const char *src; if( n == 0 ) { str[0] = '0'; str[1] = 0; return 1; } val64 = n; size = ccStrPrintLength64( val64 ); retsize = size; str[size] = 0; str += size - 1; while( val64 >= 100 ) { pos = val64 % 100; val64 /= 100; src = &ccStrPrintDecimalTable[ pos << 1 ]; str[-1] = src[0]; str[0] = src[1]; str -= 2; } while( val64 > 0 ) { *str-- = '0' + ( val64 % 10 ); val64 /= 10; } return retsize; } #define CC_STR_PRINT_DOUBLE_MAX_DECIMAL (24) static const double ccStrPrintBiasTable[CC_STR_PRINT_DOUBLE_MAX_DECIMAL+1] = { 0.5, 0.05, 0.005, 0.0005, 0.00005, 0.000005, 0.0000005, 0.00000005, 0.000000005, 0.0000000005, 0.00000000005, 0.000000000005, 0.0000000000005, 0.00000000000005, 0.000000000000005, 0.0000000000000005, 0.00000000000000005, 0.000000000000000005, 0.0000000000000000005, 0.00000000000000000005, 0.000000000000000000005, 0.0000000000000000000005, 0.00000000000000000000005, 0.000000000000000000000005, 0.0000000000000000000000005 }; int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ) { int size, offset, index; int32_t frac, accumsub; double muldec; uint32_t u32; uint64_t u64; size = 0; if( value < 0.0 ) { size = 1; *str++ = '-'; bufsize--; value = -value; } /* Add bias matching the count of desired decimals in order to round the right way */ if( decimals > CC_STR_PRINT_DOUBLE_MAX_DECIMAL ) decimals = CC_STR_PRINT_DOUBLE_MAX_DECIMAL; value += ccStrPrintBiasTable[decimals]; if( value < 4294967296.0 ) { if( bufsize < CC_STR_PRINT_BUFSIZE_UINT32 ) goto error; u32 = (int32_t)value; offset = ccStrPrintUint32( str, u32 ); size += offset; bufsize -= size; value -= (double)u32; } else if( value < 18446744073709551616.0 ) { if( bufsize < CC_STR_PRINT_BUFSIZE_UINT64 ) goto error; u64 = (int64_t)value; offset = ccStrPrintUint64( str, u64 ); size += offset; bufsize -= size; value -= (double)u64; } else goto error; if (decimals > bufsize - 2) decimals = bufsize - 2; if( decimals <= 0 ) return size; str[offset] = '.'; muldec = 10.0; accumsub = 0; str += offset + 1; for( index = 0 ; index < decimals ; index++ ) { frac = (int32_t)( value * muldec ) - accumsub; str[index] = '0' + (char)frac; accumsub += frac; accumsub = ( accumsub << 3 ) + ( accumsub << 1 ); if( muldec < 10000000 ) muldec *= 10.0; else { value *= 10000000.0; value -= (int32_t)value; muldec = 10.0; accumsub = 0; } } str[ index ] = 0; size += index + 1; return size; error: if( bufsize < 4 ) *str = 0; else { str[0] = 'E'; str[1] = 'R'; str[2] = 'R'; str[3] = 0; } return 0; } libplacebo-0.4.0/src/colorspace.c000066400000000000000000000540471324021332500167010ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" bool pl_color_system_is_ycbcr_like(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: return false; case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_YCGCO: return true; default: abort(); }; } bool pl_color_system_is_linear(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: return true; case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_XYZ: return false; default: abort(); }; } enum pl_color_system pl_color_system_guess_ycbcr(int width, int height) { if (width >= 1280 || height > 576) { // Typical HD content return PL_COLOR_SYSTEM_BT_709; } else { // Typical SD content return PL_COLOR_SYSTEM_BT_601; } } bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2) { return b1->sample_depth == b2->sample_depth && b1->color_depth == b2->color_depth && b1->bit_shift == b2->bit_shift; } const struct pl_color_repr pl_color_repr_unknown = {0}; const struct pl_color_repr pl_color_repr_rgb = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, }; const struct pl_color_repr pl_color_repr_sdtv = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_TV, }; const struct pl_color_repr pl_color_repr_hdtv = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_TV, }; const struct pl_color_repr pl_color_repr_uhdtv = { .sys = PL_COLOR_SYSTEM_BT_2020_NC, .levels = PL_COLOR_LEVELS_TV, }; const struct pl_color_repr pl_color_repr_jpeg = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_PC, }; bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2) { return c1->sys == c2->sys && c1->levels == c2->levels && c1->alpha == c2->alpha && pl_bit_encoding_equal(&c1->bits, &c2->bits); } static struct pl_bit_encoding pl_bit_encoding_merge(const struct pl_bit_encoding *orig, const struct pl_bit_encoding *new) { return (struct pl_bit_encoding) { .sample_depth = PL_DEF(orig->sample_depth, new->sample_depth), .color_depth = PL_DEF(orig->color_depth, new->color_depth), .bit_shift = PL_DEF(orig->bit_shift, new->bit_shift), }; } void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *new) { *orig = (struct pl_color_repr) { .sys = PL_DEF(orig->sys, new->sys), .levels = PL_DEF(orig->levels, new->levels), .alpha = PL_DEF(orig->alpha, new->alpha), .bits = pl_bit_encoding_merge(&orig->bits, &new->bits), }; } static enum pl_color_levels guess_levels(const struct pl_color_repr *repr) { if (repr->levels) return repr->levels; return pl_color_system_is_ycbcr_like(repr->sys) ? PL_COLOR_LEVELS_TV : PL_COLOR_LEVELS_PC; } float pl_color_repr_normalize(struct pl_color_repr *repr) { float scale = 1.0; struct pl_bit_encoding *bits = &repr->bits; if (bits->bit_shift) { scale /= (1LL << bits->bit_shift); bits->bit_shift = 0; } int tex_bits = PL_DEF(bits->sample_depth, 8); int col_bits = PL_DEF(bits->color_depth, 8); if (guess_levels(repr) == PL_COLOR_LEVELS_TV) { // Limit range is always shifted directly scale *= (float) (1LL << tex_bits) / (1LL << col_bits); } else { // Full range always uses the full range available scale *= ((1LL << tex_bits) - 1.) / ((1LL << col_bits) - 1.); } bits->sample_depth = bits->color_depth; return scale; } bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: case PL_COLOR_PRIM_BT_601_525: case PL_COLOR_PRIM_BT_601_625: case PL_COLOR_PRIM_BT_709: case PL_COLOR_PRIM_BT_470M: return false; case PL_COLOR_PRIM_BT_2020: case PL_COLOR_PRIM_APPLE: case PL_COLOR_PRIM_ADOBE: case PL_COLOR_PRIM_PRO_PHOTO: case PL_COLOR_PRIM_CIE_1931: case PL_COLOR_PRIM_DCI_P3: case PL_COLOR_PRIM_V_GAMUT: case PL_COLOR_PRIM_S_GAMUT: return true; default: abort(); } } enum pl_color_primaries pl_color_primaries_guess(int width, int height) { // HD content if (width >= 1280 || height > 576) return PL_COLOR_PRIM_BT_709; switch (height) { case 576: // Typical PAL content, including anamorphic/squared return PL_COLOR_PRIM_BT_601_625; case 480: // Typical NTSC content, including squared case 486: // NTSC Pro or anamorphic NTSC return PL_COLOR_PRIM_BT_601_525; default: // No good metric, just pick BT.709 to minimize damage return PL_COLOR_PRIM_BT_709; } } float pl_color_transfer_nominal_peak(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: return 1.0; case PL_COLOR_TRC_PQ: return 10000.0 / PL_COLOR_REF_WHITE; case PL_COLOR_TRC_HLG: return 12.0; case PL_COLOR_TRC_V_LOG: return 46.0855; case PL_COLOR_TRC_S_LOG1: return 6.52; case PL_COLOR_TRC_S_LOG2: return 9.212; default: abort(); } } bool pl_color_light_is_scene_referred(enum pl_color_light light) { switch (light) { case PL_COLOR_LIGHT_UNKNOWN: case PL_COLOR_LIGHT_DISPLAY: return false; case PL_COLOR_LIGHT_SCENE_HLG: case PL_COLOR_LIGHT_SCENE_709_1886: case PL_COLOR_LIGHT_SCENE_1_2: return true; default: abort(); } } const struct pl_color_space pl_color_space_unknown = {0}; const struct pl_color_space pl_color_space_srgb = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_SRGB, .light = PL_COLOR_LIGHT_DISPLAY, }; const struct pl_color_space pl_color_space_bt709 = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, .light = PL_COLOR_LIGHT_DISPLAY, }; const struct pl_color_space pl_color_space_hdr10 = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, .light = PL_COLOR_LIGHT_DISPLAY, }; const struct pl_color_space pl_color_space_bt2020_hlg = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, .light = PL_COLOR_LIGHT_SCENE_HLG, }; const struct pl_color_space pl_color_space_monitor = { .primaries = PL_COLOR_PRIM_BT_709, // sRGB primaries .transfer = PL_COLOR_TRC_GAMMA22, // typical response .light = PL_COLOR_LIGHT_DISPLAY, }; void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *new) { if (!orig->primaries) orig->primaries = new->primaries; if (!orig->transfer) orig->transfer = new->transfer; if (!orig->light) orig->light = new->light; if (!orig->sig_peak) orig->sig_peak = new->sig_peak; if (!orig->sig_avg) orig->sig_avg = new->sig_avg; } bool pl_color_space_equal(struct pl_color_space c1, struct pl_color_space c2) { return c1.primaries == c2.primaries && c1.transfer == c2.transfer && c1.light == c2.light && c1.sig_peak == c2.sig_peak && c1.sig_avg == c2.sig_avg; } const struct pl_color_adjustment pl_color_adjustment_neutral = { .brightness = 0.0, .contrast = 1.0, .saturation = 1.0, .hue = 0.0, .gamma = 1.0, }; void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y) { switch (loc) { case PL_CHROMA_UNKNOWN: case PL_CHROMA_CENTER: *x = 0; *y = 0; return; case PL_CHROMA_LEFT: *x = -0.5; *y = 0; return; default: abort(); } } const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim) { /* Values from: ITU-R Recommendations BT.470-6, BT.601-7, BT.709-5, BT.2020-0 https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-6-199811-S!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.601-7-201103-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-5-200204-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2020-0-201208-I!!PDF-E.pdf Other colorspaces from https://en.wikipedia.org/wiki/RGB_color_space#Specifications */ // CIE standard illuminant series #define CIE_D50 {0.34577, 0.35850} #define CIE_D65 {0.31271, 0.32902} #define CIE_C {0.31006, 0.31616} #define CIE_E {1.0/3.0, 1.0/3.0} static const struct pl_raw_primaries primaries[] = { [PL_COLOR_PRIM_BT_470M] = { .red = {0.670, 0.330}, .green = {0.210, 0.710}, .blue = {0.140, 0.080}, .white = CIE_C }, [PL_COLOR_PRIM_BT_601_525] = { .red = {0.630, 0.340}, .green = {0.310, 0.595}, .blue = {0.155, 0.070}, .white = CIE_D65 }, [PL_COLOR_PRIM_BT_601_625] = { .red = {0.640, 0.330}, .green = {0.290, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65 }, [PL_COLOR_PRIM_BT_709] = { .red = {0.640, 0.330}, .green = {0.300, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65 }, [PL_COLOR_PRIM_BT_2020] = { .red = {0.708, 0.292}, .green = {0.170, 0.797}, .blue = {0.131, 0.046}, .white = CIE_D65 }, [PL_COLOR_PRIM_APPLE] = { .red = {0.625, 0.340}, .green = {0.280, 0.595}, .blue = {0.115, 0.070}, .white = CIE_D65 }, [PL_COLOR_PRIM_ADOBE] = { .red = {0.640, 0.330}, .green = {0.210, 0.710}, .blue = {0.150, 0.060}, .white = CIE_D65 }, [PL_COLOR_PRIM_PRO_PHOTO] = { .red = {0.7347, 0.2653}, .green = {0.1596, 0.8404}, .blue = {0.0366, 0.0001}, .white = CIE_D50 }, [PL_COLOR_PRIM_CIE_1931] = { .red = {0.7347, 0.2653}, .green = {0.2738, 0.7174}, .blue = {0.1666, 0.0089}, .white = CIE_E }, // From SMPTE RP 431-2 [PL_COLOR_PRIM_DCI_P3] = { .red = {0.680, 0.320}, .green = {0.265, 0.690}, .blue = {0.150, 0.060}, .white = CIE_D65 }, // From Panasonic VARICAM reference manual [PL_COLOR_PRIM_V_GAMUT] = { .red = {0.730, 0.280}, .green = {0.165, 0.840}, .blue = {0.100, -0.03}, .white = CIE_D65 }, // From Sony S-Log reference manual [PL_COLOR_PRIM_S_GAMUT] = { .red = {0.730, 0.280}, .green = {0.140, 0.855}, .blue = {0.100, -0.05}, .white = CIE_D65 }, }; // This is the default assumption if no colorspace information could // be determined, eg. for files which have no video channel. if (!prim) prim = PL_COLOR_PRIM_BT_709; pl_assert(prim < PL_ARRAY_SIZE(primaries)); return &primaries[prim]; } // Compute the RGB/XYZ matrix as described here: // http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html struct pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim) { struct pl_matrix3x3 out = {{{0}}}; float S[3], X[4], Z[4]; // Convert from CIE xyY to XYZ. Note that Y=1 holds true for all primaries X[0] = prim->red.x / prim->red.y; X[1] = prim->green.x / prim->green.y; X[2] = prim->blue.x / prim->blue.y; X[3] = prim->white.x / prim->white.y; Z[0] = (1 - prim->red.x - prim->red.y) / prim->red.y; Z[1] = (1 - prim->green.x - prim->green.y) / prim->green.y; Z[2] = (1 - prim->blue.x - prim->blue.y) / prim->blue.y; Z[3] = (1 - prim->white.x - prim->white.y) / prim->white.y; // S = XYZ^-1 * W for (int i = 0; i < 3; i++) { out.m[0][i] = X[i]; out.m[1][i] = 1; out.m[2][i] = Z[i]; } pl_matrix3x3_invert(&out); for (int i = 0; i < 3; i++) S[i] = out.m[i][0] * X[3] + out.m[i][1] * 1 + out.m[i][2] * Z[3]; // M = [Sc * XYZc] for (int i = 0; i < 3; i++) { out.m[0][i] = S[i] * X[i]; out.m[1][i] = S[i] * 1; out.m[2][i] = S[i] * Z[i]; } return out; } struct pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim) { // For simplicity, just invert the rgb2xyz matrix struct pl_matrix3x3 out = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_invert(&out); return out; } // M := M * XYZd<-XYZs static void apply_chromatic_adaptation(struct pl_cie_xy src, struct pl_cie_xy dest, struct pl_matrix3x3 *mat) { // If the white points are nearly identical, this is a wasteful identity // operation. if (fabs(src.x - dest.x) < 1e-6 && fabs(src.y - dest.y) < 1e-6) return; // XYZd<-XYZs = Ma^-1 * (I*[Cd/Cs]) * Ma // http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html float C[3][2]; // Ma = Bradford matrix, arguably most popular method in use today. // This is derived experimentally and thus hard-coded. struct pl_matrix3x3 bradford = {{ { 0.8951, 0.2664, -0.1614 }, { -0.7502, 1.7135, 0.0367 }, { 0.0389, -0.0685, 1.0296 }, }}; for (int i = 0; i < 3; i++) { // source cone C[i][0] = bradford.m[i][0] * pl_cie_X(src) + bradford.m[i][1] * 1 + bradford.m[i][2] * pl_cie_Z(src); // dest cone C[i][1] = bradford.m[i][0] * pl_cie_X(dest) + bradford.m[i][1] * 1 + bradford.m[i][2] * pl_cie_Z(dest); } // tmp := I * [Cd/Cs] * Ma struct pl_matrix3x3 tmp = {0}; for (int i = 0; i < 3; i++) tmp.m[i][i] = C[i][1] / C[i][0]; pl_matrix3x3_mul(&tmp, &bradford); // M := M * Ma^-1 * tmp pl_matrix3x3_invert(&bradford); pl_matrix3x3_mul(mat, &bradford); pl_matrix3x3_mul(mat, &tmp); } struct pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent) { // In saturation mapping, we don't care about accuracy and just want // primaries to map to primaries, making this an identity transformation. if (intent == PL_INTENT_SATURATION) return pl_matrix3x3_identity; // RGBd<-RGBs = RGBd<-XYZd * XYZd<-XYZs * XYZs<-RGBs // Equations from: http://www.brucelindbloom.com/index.html?Math.html // Note: Perceptual is treated like relative colorimetric. There's no // definition for perceptual other than "make it look good". // RGBd<-XYZd matrix struct pl_matrix3x3 xyz2rgb_d = pl_get_xyz2rgb_matrix(dst); // Chromatic adaptation, except in absolute colorimetric intent if (intent != PL_INTENT_ABSOLUTE_COLORIMETRIC) apply_chromatic_adaptation(src->white, dst->white, &xyz2rgb_d); // XYZs<-RGBs struct pl_matrix3x3 rgb2xyz_s = pl_get_rgb2xyz_matrix(src); pl_matrix3x3_mul(&xyz2rgb_d, &rgb2xyz_s); return xyz2rgb_d; } /* Fill in the Y, U, V vectors of a yuv-to-rgb conversion matrix * based on the given luma weights of the R, G and B components (lr, lg, lb). * lr+lg+lb is assumed to equal 1. * This function is meant for colorspaces satisfying the following * conditions (which are true for common YUV colorspaces): * - The mapping from input [Y, U, V] to output [R, G, B] is linear. * - Y is the vector [1, 1, 1]. (meaning input Y component maps to 1R+1G+1B) * - U maps to a value with zero R and positive B ([0, x, y], y > 0; * i.e. blue and green only). * - V maps to a value with zero B and positive R ([x, y, 0], x > 0; * i.e. red and green only). * - U and V are orthogonal to the luma vector [lr, lg, lb]. * - The magnitudes of the vectors U and V are the minimal ones for which * the image of the set Y=[0...1],U=[-0.5...0.5],V=[-0.5...0.5] under the * conversion function will cover the set R=[0...1],G=[0...1],B=[0...1] * (the resulting matrix can be converted for other input/output ranges * outside this function). * Under these conditions the given parameters lr, lg, lb uniquely * determine the mapping of Y, U, V to R, G, B. */ static struct pl_matrix3x3 luma_coeffs(float lr, float lg, float lb) { pl_assert(fabs(lr+lg+lb - 1) < 1e-6); return (struct pl_matrix3x3) {{ {1, 0, 2 * (1-lr) }, {1, -2 * (1-lb) * lb/lg, -2 * (1-lr) * lr/lg }, {1, 2 * (1-lb), 0 }, }}; } struct pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params) { params = PL_DEF(params, &pl_color_adjustment_neutral); struct pl_matrix3x3 m; switch (repr->sys) { case PL_COLOR_SYSTEM_BT_709: m = luma_coeffs(0.2126, 0.7152, 0.0722); break; case PL_COLOR_SYSTEM_BT_601: m = luma_coeffs(0.2990, 0.5870, 0.1140); break; case PL_COLOR_SYSTEM_SMPTE_240M: m = luma_coeffs(0.2122, 0.7013, 0.0865); break; case PL_COLOR_SYSTEM_BT_2020_NC: m = luma_coeffs(0.2627, 0.6780, 0.0593); break; case PL_COLOR_SYSTEM_BT_2020_C: // Note: This outputs into the [-0.5,0.5] range for chroma information. m = (struct pl_matrix3x3) {{ {0, 0, 1}, {1, 0, 0}, {0, 1, 0} }}; break; case PL_COLOR_SYSTEM_YCGCO: m = (struct pl_matrix3x3) {{ {1, -1, 1}, {1, 1, 0}, {1, -1, -1}, }}; break; case PL_COLOR_SYSTEM_UNKNOWN: // fall through case PL_COLOR_SYSTEM_RGB: m = pl_matrix3x3_identity; break; case PL_COLOR_SYSTEM_XYZ: { // For lack of anything saner to do, just assume the caller wants // BT.709 primaries, which is a reasonable assumption. m = pl_get_xyz2rgb_matrix(pl_raw_primaries_get(PL_COLOR_PRIM_BT_709)); } break; default: abort(); } struct pl_transform3x3 out = { .mat = m }; // Apply hue and saturation in the correct way depending on the colorspace. if (pl_color_system_is_ycbcr_like(repr->sys)) { // Hue is equivalent to rotating input [U, V] subvector around the origin. // Saturation scales [U, V]. float huecos = params->saturation * cos(params->hue); float huesin = params->saturation * sin(params->hue); for (int i = 0; i < 3; i++) { float u = out.mat.m[i][1], v = out.mat.m[i][2]; out.mat.m[i][1] = huecos * u - huesin * v; out.mat.m[i][2] = huesin * u + huecos * v; } } // FIXME: apply saturation for RGB int bit_depth = PL_DEF(repr->bits.sample_depth, PL_DEF(repr->bits.color_depth, 8)); double ymax, ymin, cmax, cmid; double scale = (1LL << bit_depth) / ((1LL << bit_depth) - 1.0); switch (guess_levels(repr)) { case PL_COLOR_LEVELS_TV: { ymax = 235 / 256. * scale; ymin = 16 / 256. * scale; cmax = 240 / 256. * scale; cmid = 128 / 256. * scale; break; } case PL_COLOR_LEVELS_PC: // Note: For full-range YUV, there are multiple, subtly inconsistent // standards. So just pick the sanest implementation, which is to // assume MAX_INT == 1.0. ymax = 1.0; ymin = 0.0; cmax = 1.0; cmid = 128 / 256. * scale; // *not* exactly 0.5 break; default: abort(); } double ymul = 1.0 / (ymax - ymin); double cmul = 0.5 / (cmax - cmid); double mul[3] = { ymul, ymul, ymul }; double black[3] = { ymin, ymin, ymin }; if (pl_color_system_is_ycbcr_like(repr->sys)) { mul[1] = mul[2] = cmul; black[1] = black[2] = cmid; } // Contrast scales the output value range (gain) // Brightness scales the constant output bias (black lift/boost) for (int i = 0; i < 3; i++) { mul[i] *= params->contrast; out.c[i] += params->brightness; } // Multiply in the texture multiplier and adjust `c` so that black[j] keeps // on mapping to RGB=0 (black to black) for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { out.mat.m[i][j] *= mul[j]; out.c[i] -= out.mat.m[i][j] * black[j]; } } // Finally, multiply in the scaling factor required to get the color up to // the correct representation. pl_matrix3x3_scale(&out.mat, pl_color_repr_normalize(repr)); // Update the metadata to reflect the change. repr->sys = PL_COLOR_SYSTEM_RGB; repr->levels = PL_COLOR_LEVELS_PC; return out; } libplacebo-0.4.0/src/common.c000066400000000000000000000121371324021332500160310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" void pl_rect2d_normalize(struct pl_rect2d *rc) { *rc = (struct pl_rect2d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), }; } void pl_rect3d_normalize(struct pl_rect3d *rc) { *rc = (struct pl_rect3d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), .z0 = PL_MIN(rc->z0, rc->z1), .z1 = PL_MAX(rc->z0, rc->z1), }; } const struct pl_matrix3x3 pl_matrix3x3_identity = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}; void pl_matrix3x3_apply(const struct pl_matrix3x3 *mat, float vec[3]) { float x = vec[0], y = vec[1], z = vec[2]; for (int i = 0; i < 3; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y + mat->m[i][2] * z; } void pl_matrix3x3_scale(struct pl_matrix3x3 *mat, float scale) { for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) mat->m[i][j] *= scale; } } void pl_matrix3x3_invert(struct pl_matrix3x3 *mat) { float m00 = mat->m[0][0], m01 = mat->m[0][1], m02 = mat->m[0][2], m10 = mat->m[1][0], m11 = mat->m[1][1], m12 = mat->m[1][2], m20 = mat->m[2][0], m21 = mat->m[2][1], m22 = mat->m[2][2]; // calculate the adjoint mat->m[0][0] = (m11 * m22 - m21 * m12); mat->m[0][1] = -(m01 * m22 - m21 * m02); mat->m[0][2] = (m01 * m12 - m11 * m02); mat->m[1][0] = -(m10 * m22 - m20 * m12); mat->m[1][1] = (m00 * m22 - m20 * m02); mat->m[1][2] = -(m00 * m12 - m10 * m02); mat->m[2][0] = (m10 * m21 - m20 * m11); mat->m[2][1] = -(m00 * m21 - m20 * m01); mat->m[2][2] = (m00 * m11 - m10 * m01); // calculate the determinant (as inverse == 1/det * adjoint, // adjoint * m == identity * det, so this calculates the det) float det = m00 * mat->m[0][0] + m10 * mat->m[0][1] + m20 * mat->m[0][2]; det = 1.0f / det; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) mat->m[i][j] *= det; } } void pl_matrix3x3_mul(struct pl_matrix3x3 *a, const struct pl_matrix3x3 *b) { float a00 = a->m[0][0], a01 = a->m[0][1], a02 = a->m[0][2], a10 = a->m[1][0], a11 = a->m[1][1], a12 = a->m[1][2], a20 = a->m[2][0], a21 = a->m[2][1], a22 = a->m[2][2]; for (int i = 0; i < 3; i++) { a->m[0][i] = a00 * b->m[0][i] + a01 * b->m[1][i] + a02 * b->m[2][i]; a->m[1][i] = a10 * b->m[0][i] + a11 * b->m[1][i] + a12 * b->m[2][i]; a->m[2][i] = a20 * b->m[0][i] + a21 * b->m[1][i] + a22 * b->m[2][i]; } } const struct pl_transform3x3 pl_transform3x3_identity = { .mat = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}, }; void pl_transform3x3_apply(const struct pl_transform3x3 *t, float vec[3]) { pl_matrix3x3_apply(&t->mat, vec); for (int i = 0; i < 3; i++) vec[i] += t->c[i]; } void pl_transform3x3_scale(struct pl_transform3x3 *t, float scale) { pl_matrix3x3_scale(&t->mat, scale); for (int i = 0; i < 3; i++) t->c[i] *= scale; } // based on DarkPlaces engine (relicensed from GPL to LGPL) void pl_transform3x3_invert(struct pl_transform3x3 *t) { pl_matrix3x3_invert(&t->mat); float m00 = t->mat.m[0][0], m01 = t->mat.m[0][1], m02 = t->mat.m[0][2], m10 = t->mat.m[1][0], m11 = t->mat.m[1][1], m12 = t->mat.m[1][2], m20 = t->mat.m[2][0], m21 = t->mat.m[2][1], m22 = t->mat.m[2][2]; // fix the constant coefficient // rgb = M * yuv + C // M^-1 * rgb = yuv + M^-1 * C // yuv = M^-1 * rgb - M^-1 * C // ^^^^^^^^^^ float c0 = t->c[0], c1 = t->c[1], c2 = t->c[2]; t->c[0] = -(m00 * c0 + m01 * c1 + m02 * c2); t->c[1] = -(m10 * c0 + m11 * c1 + m12 * c2); t->c[2] = -(m20 * c0 + m21 * c1 + m22 * c2); } const struct pl_matrix2x2 pl_matrix2x2_identity = {{ { 1, 0 }, { 0, 1 }, }}; void pl_matrix2x2_apply(const struct pl_matrix2x2 *mat, float vec[2]) { float x = vec[0], y = vec[1]; for (int i = 0; i < 2; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y; } const struct pl_transform2x2 pl_transform2x2_identity = { .mat = {{ { 1, 0 }, { 0, 1 }, }}, }; void pl_transform2x2_apply(const struct pl_transform2x2 *t, float vec[2]) { pl_matrix2x2_apply(&t->mat, vec); for (int i = 0; i < 2; i++) vec[i] += t->c[i]; } libplacebo-0.4.0/src/common.h000066400000000000000000000063671324021332500160460ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include "ta/talloc.h" #include "config.h" #include "pl_assert.h" // Include all of the symbols that should be public in a way that marks them // as being externally visible. (Otherwise, all symbols are hidden by default) #pragma GCC visibility push(default) #include "include/libplacebo/colorspace.h" #include "include/libplacebo/common.h" #include "include/libplacebo/context.h" #include "include/libplacebo/dispatch.h" #include "include/libplacebo/dither.h" #include "include/libplacebo/filters.h" #include "include/libplacebo/gpu.h" #include "include/libplacebo/renderer.h" #include "include/libplacebo/shaders.h" #include "include/libplacebo/shaders/colorspace.h" #include "include/libplacebo/shaders/sampling.h" #include "include/libplacebo/swapchain.h" #include "include/libplacebo/utils/upload.h" #if PL_HAVE_VULKAN #include "include/libplacebo/vulkan.h" #endif #pragma GCC visibility pop // Align up to the nearest multiple of an arbitrary alignment, which may also // be 0 to signal no alignment requirements. #define PL_ALIGN(x, align) ((align) ? ((x) + (align) - 1) / (align) * (align) : (x)) // This is faster but must only be called on positive powers of two. #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) // Returns the log base 2 of an unsigned long long #define PL_LOG2(x) ((unsigned) (8*sizeof (unsigned long long) - __builtin_clzll((x)) - 1)) // Returns the size of a static array with known size. #define PL_ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) // Swaps two variables #define PL_SWAP(a, b) \ do { \ __typeof__ (a) tmp = (a); \ (a) = (b); \ (b) = tmp; \ } while (0) // Helper functions for transposing a matrix in-place. #define PL_TRANSPOSE_DIM(d, m) \ pl_transpose((d), (float[(d)*(d)]){0}, (const float *)(m)) #define PL_TRANSPOSE_2X2(m) PL_TRANSPOSE_DIM(2, m) #define PL_TRANSPOSE_3X3(m) PL_TRANSPOSE_DIM(3, m) #define PL_TRANSPOSE_4X4(m) PL_TRANSPOSE_DIM(4, m) static inline float *pl_transpose(int dim, float *out, const float *in) { for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) out[i * dim + j] = in[j * dim + i]; } return out; } // Helper functions for some common numeric operations (careful: double-eval) #define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) #define PL_MIN(x, y) ((x) < (y) ? (x) : (y)) #define PL_CMP(a, b) ((a) < (b) ? -1 : (a) > (b) ? 1 : 0) #define PL_DEF(x, d) ((x) ? (x) : (d)) libplacebo-0.4.0/src/config.h.in000066400000000000000000000026241324021332500164200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_CONFIG_H_ #define LIBPLACEBO_CONFIG_H_ // Increased any time the library changes in a fundamental/major way. #define PL_MAJOR_VER @majorver@ // Increased any time the API changes. (Note: Does not reset when PL_MAJOR_VER // is increased) #define PL_API_VER @apiver@ // Increased any time a fix is made to a given API version. #define PL_FIX_VER @fixver@ // Friendly name (`git describe`) for the overall version of the library #define PL_VERSION @version@ // Feature tests. These aren't described in further detail, but may be useful // for programmers wanting to programmatically check for feature support // in their compiled libshaderc versions. @extra_defs@ #endif // LIBPLACEBO_CONTEXT_H_ libplacebo-0.4.0/src/context.c000066400000000000000000000106301324021332500162210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include "common.h" #include "context.h" static pthread_mutex_t pl_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; static int pl_ctx_refcount; static void global_init(void) { #ifndef NDEBUG const char *enable_leak = getenv("LIBPLACEBO_LEAK_REPORT"); if (enable_leak && strcmp(enable_leak, "1") == 0) talloc_enable_leak_report(); #endif } static void global_uninit(void) { #ifndef NDEBUG talloc_print_leak_report(); #endif } struct pl_context *pl_context_create(int api_ver, const struct pl_context_params *params) { if (api_ver != PL_API_VER) { fprintf(stderr, "*************************************************************\n" "libplacebo: ABI mismatch detected!\n\n" "This is usually indicative of a linking mismatch, and will\n" "result in serious issues including stack corruption, random\n" "crashes and arbitrary code execution. Aborting as a safety\n" "precaution. Fix your system!\n"); abort(); } // Do global initialization only when refcount is 0 pthread_mutex_lock(&pl_ctx_mutex); if (pl_ctx_refcount++ == 0) global_init(); pthread_mutex_unlock(&pl_ctx_mutex); struct pl_context *ctx = talloc_zero(NULL, struct pl_context); ctx->params = *PL_DEF(params, &pl_context_default_params); return ctx; } const struct pl_context_params pl_context_default_params = {0}; void pl_context_destroy(struct pl_context **ctx) { TA_FREEP(ctx); // Do global uninitialization only when refcount reaches 0 pthread_mutex_lock(&pl_ctx_mutex); if (--pl_ctx_refcount == 0) global_uninit(); pthread_mutex_unlock(&pl_ctx_mutex); } static FILE *default_stream(void *stream, enum pl_log_level level) { return PL_DEF(stream, level <= PL_LOG_WARN ? stderr : stdout); } void pl_log_simple(void *stream, enum pl_log_level level, const char *msg) { static const char *prefix[] = { [PL_LOG_FATAL] = "fatal", [PL_LOG_ERR] = "error", [PL_LOG_WARN] = "warn", [PL_LOG_INFO] = "info", [PL_LOG_DEBUG] = "debug", [PL_LOG_TRACE] = "trace", }; FILE *h = default_stream(stream, level); fprintf(h, "%5s: %s\n", prefix[level], msg); } void pl_log_color(void *stream, enum pl_log_level level, const char *msg) { static const char *color[] = { [PL_LOG_FATAL] = "31;1", // bright red [PL_LOG_ERR] = "31", // red [PL_LOG_WARN] = "33", // yellow/orange [PL_LOG_INFO] = "32", // green [PL_LOG_DEBUG] = "34", // blue [PL_LOG_TRACE] = "30;1", // bright black }; FILE *h = default_stream(stream, level); fprintf(h, "\033[%sm%s\033[0m\n", color[level], msg); } void pl_msg(struct pl_context *ctx, enum pl_log_level lev, const char *fmt, ...) { va_list va; va_start(va, fmt); pl_msg_va(ctx, lev, fmt, va); va_end(va); } void pl_msg_va(struct pl_context *ctx, enum pl_log_level lev, const char *fmt, va_list va) { if (!pl_msg_test(ctx, lev)) return; ctx->logbuffer.len = 0; bstr_xappend_vasprintf(ctx, &ctx->logbuffer, fmt, va); ctx->params.log_cb(ctx->params.log_priv, lev, ctx->logbuffer.start); } void pl_msg_source(struct pl_context *ctx, enum pl_log_level lev, const char *src) { if (!pl_msg_test(ctx, lev) || !src) return; int line = 1; while (*src) { const char *end = strchr(src, '\n'); const char *next = end + 1; if (!end) next = end = src + strlen(src); pl_msg(ctx, lev, "[%3d] %.*s", line, (int)(end - src), src); line++; src = next; } } libplacebo-0.4.0/src/context.h000066400000000000000000000044361324021332500162350ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include "bstr/bstr.h" #include "common.h" struct pl_context { struct pl_context_params params; struct bstr logbuffer; }; // Logging-related functions static inline bool pl_msg_test(struct pl_context *ctx, enum pl_log_level lev) { return ctx->params.log_cb && ctx->params.log_level >= lev; } void pl_msg(struct pl_context *ctx, enum pl_log_level lev, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); void pl_msg_va(struct pl_context *ctx, enum pl_log_level lev, const char *fmt, va_list va); // Convenience macros #define pl_fatal(log, ...) pl_msg(ctx, PL_LOG_FATAL, __VA_ARGS__) #define pl_err(log, ...) pl_msg(ctx, PL_LOG_ERR, __VA_ARGS__) #define pl_warn(log, ...) pl_msg(ctx, PL_LOG_WARN, __VA_ARGS__) #define pl_info(log, ...) pl_msg(ctx, PL_LOG_INFO, __VA_ARGS__) #define pl_debug(log, ...) pl_msg(ctx, PL_LOG_DEBUG, __VA_ARGS__) #define pl_trace(log, ...) pl_msg(ctx, PL_LOG_TRACE, __VA_ARGS__) #define PL_MSG(obj, lev, ...) pl_msg((obj)->ctx, lev, __VA_ARGS__) #define PL_FATAL(obj, ...) PL_MSG(obj, PL_LOG_FATAL, __VA_ARGS__) #define PL_ERR(obj, ...) PL_MSG(obj, PL_LOG_ERR, __VA_ARGS__) #define PL_WARN(obj, ...) PL_MSG(obj, PL_LOG_WARN, __VA_ARGS__) #define PL_INFO(obj, ...) PL_MSG(obj, PL_LOG_INFO, __VA_ARGS__) #define PL_DEBUG(obj, ...) PL_MSG(obj, PL_LOG_DEBUG, __VA_ARGS__) #define PL_TRACE(obj, ...) PL_MSG(obj, PL_LOG_TRACE, __VA_ARGS__) // Log something with line numbers included void pl_msg_source(struct pl_context *ctx, enum pl_log_level lev, const char *src); libplacebo-0.4.0/src/dispatch.c000066400000000000000000000756341324021332500163530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "context.h" #include "shaders.h" #include "gpu.h" enum { TMP_PRELUDE, // GLSL version, global definitions, etc. TMP_MAIN, // main GLSL shader body TMP_VERT_HEAD, // vertex shader inputs/outputs TMP_VERT_BODY, // vertex shader body TMP_COUNT, }; struct pl_dispatch { struct pl_context *ctx; const struct pl_gpu *gpu; uint8_t current_ident; uint8_t current_index; // pool of pl_shaders, in order to avoid frequent re-allocations struct pl_shader **shaders; int num_shaders; // cache of compiled passes struct pass **passes; int num_passes; // temporary buffers to help avoid re_allocations during pass creation struct bstr tmp[TMP_COUNT]; }; enum pass_var_type { PASS_VAR_GLOBAL, // regular/global uniforms (PL_GPU_CAP_INPUT_VARIABLES) PASS_VAR_UBO, // uniform buffers PASS_VAR_PUSHC // push constants }; // Cached metadata about a variable's effective placement / update method struct pass_var { int index; // for pl_var_update enum pass_var_type type; struct pl_var_layout layout; void *cached_data; }; struct pass { uint64_t signature; // as returned by pl_shader_signature const struct pl_pass *pass; bool failed; // contains cached data and update metadata, same order as pl_shader struct pass_var *vars; // for uniform buffer updates const struct pl_buf *ubo; struct pl_desc ubo_desc; // temporary // Cached pl_pass_run_params. This will also contain mutable allocations // for the push constants, descriptor bindings (including the binding for // the UBO pre-filled), vertex array and variable updates struct pl_pass_run_params run_params; }; static void pass_destroy(struct pl_dispatch *dp, struct pass *pass) { if (!pass) return; pl_buf_destroy(dp->gpu, &pass->ubo); pl_pass_destroy(dp->gpu, &pass->pass); talloc_free(pass); } struct pl_dispatch *pl_dispatch_create(struct pl_context *ctx, const struct pl_gpu *gpu) { pl_assert(ctx); struct pl_dispatch *dp = talloc_zero(ctx, struct pl_dispatch); dp->ctx = ctx; dp->gpu = gpu; return dp; } void pl_dispatch_destroy(struct pl_dispatch **ptr) { struct pl_dispatch *dp = *ptr; if (!dp) return; for (int i = 0; i < dp->num_passes; i++) pass_destroy(dp, dp->passes[i]); for (int i = 0; i < dp->num_shaders; i++) pl_shader_free(&dp->shaders[i]); talloc_free(dp); *ptr = NULL; } struct pl_shader *pl_dispatch_begin(struct pl_dispatch *dp) { uint8_t ident = dp->current_ident++; struct pl_shader *sh; if (TARRAY_POP(dp->shaders, dp->num_shaders, &sh)) { pl_shader_reset(sh, ident, dp->current_index); return sh; } return pl_shader_alloc(dp->ctx, dp->gpu, ident, dp->current_index); } void pl_dispatch_reset_frame(struct pl_dispatch *dp) { dp->current_ident = 0; dp->current_index++; } static bool add_pass_var(struct pl_dispatch *dp, void *tmp, struct pass *pass, struct pl_pass_params *params, const struct pl_shader_var *sv, struct pass_var *pv) { const struct pl_gpu *gpu = dp->gpu; // Try not to use push constants for "large" values like matrices, since // this is likely to exceed the VGPR/pushc size budgets bool try_pushc = (sv->var.dim_m == 1 && sv->var.dim_a == 1) || sv->dynamic; if (try_pushc && gpu->glsl.vulkan && gpu->limits.max_pushc_size) { pv->layout = pl_push_constant_layout(gpu, params->push_constants_size, &sv->var); size_t new_size = pv->layout.offset + pv->layout.size; if (new_size <= gpu->limits.max_pushc_size) { params->push_constants_size = new_size; pv->type = PASS_VAR_PUSHC; return true; } } // Attempt using uniform buffer next. The GLSL version 440 check is due // to explicit offsets on UBO entries. In theory we could leave away // the offsets and support UBOs for older GL as well, but this is a nice // safety net for driver bugs (and also rules out potentially buggy drivers) // Also avoid UBOs for highly dynamic stuff since that requires synchronizing // the UBO writes every frame bool try_ubo = !(gpu->caps & PL_GPU_CAP_INPUT_VARIABLES) || !sv->dynamic; if (try_ubo && gpu->glsl.version >= 440 && gpu->limits.max_ubo_size) { if (pl_buf_desc_append(tmp, gpu, &pass->ubo_desc, &pv->layout, sv->var)) { pv->type = PASS_VAR_UBO; return true; } } // Otherwise, use global uniforms if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES) { pv->type = PASS_VAR_GLOBAL; pv->index = params->num_variables; pv->layout = pl_var_host_layout(0, &sv->var); TARRAY_APPEND(tmp, params->variables, params->num_variables, sv->var); return true; } // Ran out of variable binding methods. The most likely scenario in which // this can happen is if we're using a GPU that does not support global // input vars and we've exhausted the UBO size limits. PL_ERR(dp, "Unable to add input variable '%s': possibly exhausted " "UBO size limits?", sv->var.name); return false; } #define ADD(x, ...) bstr_xappend_asprintf_c(dp, (x), __VA_ARGS__) #define ADD_BSTR(x, s) bstr_xappend(dp, (x), (s)) static void add_var(struct pl_dispatch *dp, struct bstr *body, const struct pl_var *var) { ADD(body, "%s %s", pl_var_glsl_type_name(*var), var->name); if (var->dim_a > 1) { ADD(body, "[%d];\n", var->dim_a); } else { ADD(body, ";\n"); } } static void add_buffer_vars(struct pl_dispatch *dp, struct bstr *body, const struct pl_buffer_var *vars, int num) { ADD(body, "{\n"); for (int i = 0; i < num; i++) { ADD(body, " layout(offset=%zu) ", vars[i].layout.offset); add_var(dp, body, &vars[i].var); } ADD(body, "};\n"); } static ident_t sh_var_from_va(struct pl_shader *sh, const char *name, const struct pl_vertex_attrib *va, const void *data) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_from_fmt(va->fmt, name), .data = data, }); } static void generate_shaders(struct pl_dispatch *dp, struct pass *pass, struct pl_pass_params *params, struct pl_shader *sh, ident_t vert_pos) { const struct pl_gpu *gpu = dp->gpu; const struct pl_shader_res *res = pl_shader_finalize(sh); struct bstr *pre = &dp->tmp[TMP_PRELUDE]; ADD(pre, "#version %d%s\n", gpu->glsl.version, gpu->glsl.gles ? " es" : ""); if (params->type == PL_PASS_COMPUTE) ADD(pre, "#extension GL_ARB_compute_shader : enable\n"); if (gpu->glsl.gles) { ADD(pre, "precision mediump float;\n"); ADD(pre, "precision mediump sampler2D;\n"); if (gpu->limits.max_tex_1d_dim) ADD(pre, "precision mediump sampler1D;\n"); if (gpu->limits.max_tex_3d_dim) ADD(pre, "precision mediump sampler3D;\n"); } char *vert_in = gpu->glsl.version >= 130 ? "in" : "attribute"; char *vert_out = gpu->glsl.version >= 130 ? "out" : "varying"; char *frag_in = gpu->glsl.version >= 130 ? "in" : "varying"; struct bstr *glsl = &dp->tmp[TMP_MAIN]; ADD_BSTR(glsl, *pre); const char *out_color = "gl_FragColor"; switch(params->type) { case PL_PASS_RASTER: { pl_assert(vert_pos); struct bstr *vert_head = &dp->tmp[TMP_VERT_HEAD]; struct bstr *vert_body = &dp->tmp[TMP_VERT_BODY]; // Set up a trivial vertex shader ADD_BSTR(vert_head, *pre); ADD(vert_body, "void main() {\n"); for (int i = 0; i < res->num_vertex_attribs; i++) { const struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; const struct pl_shader_va *sva = &res->vertex_attribs[i]; const char *type = va->fmt->glsl_type; // Use the pl_shader_va for the name in the fragment shader since // the pl_vertex_attrib is already mangled for the vertex shader const char *name = sva->attr.name; char loc[32]; snprintf(loc, sizeof(loc), "layout(location=%d) ", va->location); ADD(vert_head, "%s%s %s %s;\n", loc, vert_in, type, va->name); if (strcmp(name, vert_pos) == 0) { pl_assert(va->fmt->num_components == 2); ADD(vert_body, "gl_Position = vec4(%s, 0.0, 1.0);\n", va->name); } else { // Everything else is just blindly passed through ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, type, name); ADD(vert_body, "%s = %s;\n", name, va->name); ADD(glsl, "%s%s %s %s;\n", loc, frag_in, type, name); } } ADD(vert_body, "}"); ADD_BSTR(vert_head, *vert_body); params->vertex_shader = vert_head->start; // GLSL 130+ doesn't use the magic gl_FragColor if (gpu->glsl.version >= 130) { out_color = "out_color"; ADD(glsl, "layout(location=0) out vec4 %s;\n", out_color); } break; } case PL_PASS_COMPUTE: ADD(glsl, "layout (local_size_x = %d, local_size_y = %d) in;\n", res->compute_group_size[0], res->compute_group_size[1]); break; default: abort(); } // Add all of the push constants as their own element if (params->push_constants_size) { ADD(glsl, "layout(std430, push_constant) uniform PushC {\n"); for (int i = 0; i < res->num_variables; i++) { struct pl_var *var = &res->variables[i].var; struct pass_var *pv = &pass->vars[i]; if (pv->type != PASS_VAR_PUSHC) continue; ADD(glsl, "/*offset=%zu*/ ", pv->layout.offset); add_var(dp, glsl, var); } ADD(glsl, "};\n"); } // Add all of the required descriptors for (int i = 0; i < res->num_descriptors; i++) { const struct pl_shader_desc *sd = &res->descriptors[i]; const struct pl_desc *desc = ¶ms->descriptors[i]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { static const char *types[] = { [1] = "sampler1D", [2] = "sampler2D", [3] = "sampler3D", }; // Vulkan requires explicit bindings; GL always sets the // bindings manually to avoid relying on the user doing so if (gpu->glsl.vulkan) ADD(glsl, "layout(binding=%d) ", desc->binding); const struct pl_tex *tex = sd->object; int dims = pl_tex_params_dimension(tex->params); ADD(glsl, "uniform %s %s;\n", types[dims], desc->name); break; } case PL_DESC_STORAGE_IMG: { static const char *types[] = { [1] = "image1D", [2] = "image2D", [3] = "image3D", }; // For better compatibility, we have to explicitly label the // type of data we will be reading/writing to this image. const struct pl_tex *tex = sd->object; const char *format = tex->params.format->glsl_format; const char *access = pl_desc_access_glsl_name(desc->access); int dims = pl_tex_params_dimension(tex->params); pl_assert(format); if (gpu->glsl.vulkan) { ADD(glsl, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(glsl, "layout(%s) ", format); } ADD(glsl, "%s uniform %s %s;\n", access, types[dims], desc->name); break; } case PL_DESC_BUF_UNIFORM: ADD(glsl, "layout(std140, binding=%d) uniform %s ", desc->binding, desc->name); add_buffer_vars(dp, glsl, desc->buffer_vars, desc->num_buffer_vars); break; case PL_DESC_BUF_STORAGE: ADD(glsl, "layout(std430, binding=%d) %s buffer %s ", desc->binding, pl_desc_access_glsl_name(desc->access), desc->name); add_buffer_vars(dp, glsl, desc->buffer_vars, desc->num_buffer_vars); break; case PL_DESC_BUF_TEXEL_UNIFORM: if (gpu->glsl.vulkan) ADD(glsl, "layout(binding=%d) ", desc->binding); ADD(glsl, "uniform samplerBuffer %s;\n", desc->name); break; case PL_DESC_BUF_TEXEL_STORAGE: { const struct pl_buf *buf = sd->object; const char *format = buf->params.format->glsl_format; const char *access = pl_desc_access_glsl_name(desc->access); if (gpu->glsl.vulkan) { ADD(glsl, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(glsl, "layout(%s) ", format); } ADD(glsl, "%s uniform imageBuffer %s;\n", access, desc->name); break; } default: abort(); } } // Add all of the remaining variables for (int i = 0; i < res->num_variables; i++) { const struct pl_var *var = &res->variables[i].var; const struct pass_var *pv = &pass->vars[i]; if (pv->type != PASS_VAR_GLOBAL) continue; ADD(glsl, "uniform "); add_var(dp, glsl, var); } // Set up the main shader body ADD(glsl, "%s", res->glsl); ADD(glsl, "void main() {\n"); pl_assert(res->input == PL_SHADER_SIG_NONE); switch (params->type) { case PL_PASS_RASTER: pl_assert(res->output == PL_SHADER_SIG_COLOR); ADD(glsl, "%s = %s();\n", out_color, res->name); break; case PL_PASS_COMPUTE: pl_assert(res->output == PL_SHADER_SIG_NONE); ADD(glsl, "%s();\n", res->name); break; default: abort(); } ADD(glsl, "}"); params->glsl_shader = glsl->start; } #undef ADD #undef ADD_BSTR static bool blend_equal(const struct pl_blend_params *a, const struct pl_blend_params *b) { if (!a && !b) return true; if (!a || !b) return false; return a->src_rgb == b->src_rgb && a->dst_rgb == b->dst_rgb && a->src_alpha == b->src_alpha && a->dst_alpha == b->dst_alpha; } static struct pass *find_pass(struct pl_dispatch *dp, struct pl_shader *sh, const struct pl_tex *target, ident_t vert_pos, const struct pl_blend_params *blend) { uint64_t sig = pl_shader_signature(sh); for (int i = 0; i < dp->num_passes; i++) { const struct pass *p = dp->passes[i]; if (p->signature == sig && blend_equal(p->pass->params.blend_params, blend)) return dp->passes[i]; } void *tmp = talloc_new(NULL); // for resources attached to `params` struct pass *pass = talloc_zero(dp, struct pass); pass->signature = sig; pass->failed = true; // will be set to false on success pass->ubo_desc = (struct pl_desc) { .name = "UBO", .type = PL_DESC_BUF_UNIFORM, }; struct pl_shader_res *res = &sh->res; struct pl_pass_run_params *rparams = &pass->run_params; struct pl_pass_params params = { .type = pl_shader_is_compute(sh) ? PL_PASS_COMPUTE : PL_PASS_RASTER, .num_descriptors = res->num_descriptors, .blend_params = blend, // set this for all pass types (for caching) }; if (params.type == PL_PASS_RASTER) { params.target_dummy = *target; // Fill in the vertex attributes array params.num_vertex_attribs = res->num_vertex_attribs; params.vertex_attribs = talloc_zero_array(tmp, struct pl_vertex_attrib, res->num_vertex_attribs); int va_loc = 0; for (int i = 0; i < res->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms.vertex_attribs[i]; *va = res->vertex_attribs[i].attr; // Mangle the name to make sure it doesn't conflict with the // fragment shader input va->name = talloc_asprintf(tmp, "vert%s", va->name); // Place the vertex attribute va->offset = params.vertex_stride; va->location = va_loc; params.vertex_stride += va->fmt->texel_size; // The number of vertex attribute locations consumed by a vertex // attribute is the number of vec4s it consumes, rounded up const size_t va_loc_size = sizeof(float[4]); va_loc += (va->fmt->texel_size + va_loc_size - 1) / va_loc_size; } // Generate the vertex array placeholder params.vertex_type = PL_PRIM_TRIANGLE_STRIP; rparams->vertex_count = 4; // single quad size_t vert_size = rparams->vertex_count * params.vertex_stride; rparams->vertex_data = talloc_zero_size(pass, vert_size); } // Place all the variables; these will dynamically end up in different // locations based on what the underlying GPU supports (UBOs, pushc, etc.) pass->vars = talloc_zero_array(pass, struct pass_var, res->num_variables); for (int i = 0; i < res->num_variables; i++) { if (!add_pass_var(dp, tmp, pass, ¶ms, &res->variables[i], &pass->vars[i])) goto error; } // Create and attach the UBO if necessary int ubo_index = -1; size_t ubo_size = pl_buf_desc_size(&pass->ubo_desc); if (ubo_size) { pass->ubo = pl_buf_create(dp->gpu, &(struct pl_buf_params) { .type = PL_BUF_UNIFORM, .size = ubo_size, .host_writable = true, }); if (!pass->ubo) { PL_ERR(dp, "Failed creating uniform buffer for dispatch"); goto error; } ubo_index = res->num_descriptors; sh_desc(sh, (struct pl_shader_desc) { .desc = pass->ubo_desc, .object = pass->ubo, }); } // Place and fill in the descriptors int num = res->num_descriptors; int binding[PL_DESC_TYPE_COUNT] = {0}; params.num_descriptors = num; params.descriptors = talloc_zero_array(tmp, struct pl_desc, num); rparams->desc_bindings = talloc_zero_array(pass, struct pl_desc_binding, num); for (int i = 0; i < num; i++) { struct pl_desc *desc = ¶ms.descriptors[i]; *desc = res->descriptors[i].desc; desc->binding = binding[pl_desc_namespace(dp->gpu, desc->type)]++; } // Pre-fill the desc_binding for the UBO if (pass->ubo) { pl_assert(ubo_index >= 0); rparams->desc_bindings[ubo_index].object = pass->ubo; } // Create the push constants region params.push_constants_size = PL_ALIGN2(params.push_constants_size, 4); rparams->push_constants = talloc_zero_size(pass, params.push_constants_size); // Finally, finalize the shaders and create the pass itself generate_shaders(dp, pass, ¶ms, sh, vert_pos); pass->pass = rparams->pass = pl_pass_create(dp->gpu, ¶ms); if (!pass->pass) { PL_ERR(dp, "Failed creating render pass for dispatch"); goto error; } pass->failed = false; error: pass->ubo_desc = (struct pl_desc) {0}; // contains temporary pointers talloc_free(tmp); TARRAY_APPEND(dp, dp->passes, dp->num_passes, pass); return pass; } static void update_pass_var(struct pl_dispatch *dp, struct pass *pass, const struct pl_shader_var *sv, struct pass_var *pv) { struct pl_var_layout host_layout = pl_var_host_layout(0, &sv->var); pl_assert(host_layout.size); // Use the cache to skip updates if possible if (pv->cached_data && !memcmp(sv->data, pv->cached_data, host_layout.size)) return; if (!pv->cached_data) pv->cached_data = talloc_size(pass, host_layout.size); memcpy(pv->cached_data, sv->data, host_layout.size); struct pl_pass_run_params *rparams = &pass->run_params; switch (pv->type) { case PASS_VAR_GLOBAL: { struct pl_var_update vu = { .index = pv->index, .data = sv->data, }; TARRAY_APPEND(pass, rparams->var_updates, rparams->num_var_updates, vu); break; } case PASS_VAR_UBO: { pl_assert(pass->ubo); uintptr_t src = (uintptr_t) sv->data; uintptr_t end = src + (ptrdiff_t) host_layout.size; size_t dst = pv->layout.offset; while (src < end) { pl_buf_write(dp->gpu, pass->ubo, dst, (void *) src, host_layout.stride); src += host_layout.stride; dst += pv->layout.stride; } break; } case PASS_VAR_PUSHC: pl_assert(rparams->push_constants); memcpy_layout(rparams->push_constants, pv->layout, sv->data, host_layout); break; }; } static void translate_compute_shader(struct pl_dispatch *dp, struct pl_shader *sh, const struct pl_tex *target, const struct pl_rect2d *rc, const struct pl_blend_params *blend) { // Simulate vertex attributes using global definitions int width = abs(pl_rect_w(*rc)), height = abs(pl_rect_h(*rc)); ident_t out_scale = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("out_scale"), .data = &(float[2]){ 1.0 / width, 1.0 / height }, .dynamic = true, }); GLSLP("#define frag_pos(id) (vec2(id) + vec2(0.5)) \n" "#define frag_map(id) (%s * frag_pos(id)) \n" "#define gl_FragCoord vec4(frag_pos(gl_GlobalInvocationID), 0.0, 1.0) \n", out_scale); for (int n = 0; n < sh->res.num_vertex_attribs; n++) { const struct pl_shader_va *sva = &sh->res.vertex_attribs[n]; ident_t points[4]; for (int i = 0; i < PL_ARRAY_SIZE(points); i++) { char name[4]; snprintf(name, sizeof(name), "p%d", i); points[i] = sh_var_from_va(sh, name, &sva->attr, sva->data[i]); } GLSLP("#define %s_map(id) " "(mix(mix(%s, %s, frag_map(id).x), " " mix(%s, %s, frag_map(id).x), " "frag_map(id).y))\n" "#define %s (%s_map(gl_GlobalInvocationID))\n", sva->attr.name, points[0], points[1], points[2], points[3], sva->attr.name, sva->attr.name); } // Simulate a framebuffer using storage images pl_assert(target->params.storable); pl_assert(sh->res.output == PL_SHADER_SIG_COLOR); ident_t fbo = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "out_image", .type = PL_DESC_STORAGE_IMG, .access = blend ? PL_DESC_ACCESS_READWRITE : PL_DESC_ACCESS_WRITEONLY, }, .object = target, }); ident_t base = sh_var(sh, (struct pl_shader_var) { .data = &(int[2]){ rc->x0, rc->y0 }, .dynamic = true, .var = { .name = "base", .type = PL_VAR_SINT, .dim_v = 2, .dim_m = 1, .dim_a = 1, }, }); int dx = rc->x0 > rc->x1 ? -1 : 1, dy = rc->y0 > rc->y1 ? -1 : 1; GLSL("ivec2 dir = ivec2(%d, %d);\n", dx, dy); // hard-code, not worth var GLSL("ivec2 pos = %s + dir * ivec2(gl_GlobalInvocationID);\n", base); GLSL("vec2 fpos = %s * vec2(gl_GlobalInvocationID);\n", out_scale); GLSL("if (max(fpos.x, fpos.y) < 1.0) {\n"); if (blend) { GLSL("vec4 orig = imageLoad(%s, pos);\n", fbo); static const char *modes[] = { [PL_BLEND_ZERO] = "0.0", [PL_BLEND_ONE] = "1.0", [PL_BLEND_SRC_ALPHA] = "color.a", [PL_BLEND_ONE_MINUS_SRC_ALPHA] = "(1.0 - color.a)", }; GLSL("color = vec4(color.rgb * vec3(%s), color.a * %s) \n" " + vec4(orig.rgb * vec3(%s), orig.a * %s);\n", modes[blend->src_rgb], modes[blend->src_alpha], modes[blend->dst_rgb], modes[blend->dst_alpha]); } GLSL("imageStore(%s, pos, color);\n", fbo); GLSL("}\n"); sh->res.output = PL_SHADER_SIG_NONE; } bool pl_dispatch_finish(struct pl_dispatch *dp, struct pl_shader **psh, const struct pl_tex *target, const struct pl_rect2d *rc, const struct pl_blend_params *blend) { struct pl_shader *sh = *psh; const struct pl_shader_res *res = &sh->res; bool ret = false; if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (res->input != PL_SHADER_SIG_NONE || res->output != PL_SHADER_SIG_COLOR) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } const struct pl_tex_params *tpars = &target->params; if (pl_tex_params_dimension(*tpars) != 2 || !tpars->renderable) { PL_ERR(dp, "Trying to dispatch using a shader using an invalid target " "texture. The target must be a renderable 2D texture."); goto error; } struct pl_rect2d full = {0, 0, tpars->w, tpars->h}; rc = PL_DEF(rc, &full); int w, h, tw = abs(pl_rect_w(*rc)), th = abs(pl_rect_h(*rc)); if (pl_shader_output_size(sh, &w, &h) && (w != tw || h != th)) { PL_ERR(dp, "Trying to dispatch a shader with explicit output size " "requirements %dx%d using a target rect of size %dx%d.", w, h, tw, th); goto error; } ident_t vert_pos = NULL; if (pl_shader_is_compute(sh)) { // Translate the compute shader to simulate vertices etc. translate_compute_shader(dp, sh, target, rc, blend); } else { // Add the vertex information encoding the position vert_pos = sh_attr_vec2(sh, "position", &(const struct pl_rect2df) { .x0 = 2.0 * rc->x0 / tpars->w - 1.0, .y0 = 2.0 * rc->y0 / tpars->h - 1.0, .x1 = 2.0 * rc->x1 / tpars->w - 1.0, .y1 = 2.0 * rc->y1 / tpars->h - 1.0, }); } struct pass *pass = find_pass(dp, sh, target, vert_pos, blend); // Silently return on failed passes if (pass->failed) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->res.num_descriptors; i++) rparams->desc_bindings[i].object = sh->res.descriptors[i].object; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < res->num_variables; i++) update_pass_var(dp, pass, &res->variables[i], &pass->vars[i]); // Update the vertex data if (rparams->vertex_data) { uintptr_t vert_base = (uintptr_t) rparams->vertex_data; size_t stride = rparams->pass->params.vertex_stride; for (int i = 0; i < res->num_vertex_attribs; i++) { struct pl_shader_va *sva = &res->vertex_attribs[i]; struct pl_vertex_attrib *va = &rparams->pass->params.vertex_attribs[i]; size_t size = sva->attr.fmt->texel_size; uintptr_t va_base = vert_base + va->offset; // use placed offset for (int n = 0; n < 4; n++) memcpy((void *) (va_base + n * stride), sva->data[n], size); } } // For compute shaders: also update the dispatch dimensions if (pl_shader_is_compute(sh)) { // Round up to make sure we don-t leave off a part of the target int width = abs(pl_rect_w(*rc)), height = abs(pl_rect_h(*rc)), block_w = sh->res.compute_group_size[0], block_h = sh->res.compute_group_size[1], num_x = (width + block_w - 1) / block_w, num_y = (height + block_h - 1) / block_h; rparams->compute_groups[0] = num_x; rparams->compute_groups[1] = num_y; rparams->compute_groups[2] = 1; } else { // Update the scissors for performance rparams->scissors = *rc; pl_rect2d_normalize(&rparams->scissors); } // Dispatch the actual shader rparams->target = target; pl_pass_run(dp->gpu, &pass->run_params); ret = true; error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i].len = 0; pl_dispatch_abort(dp, psh); return ret; } bool pl_dispatch_compute(struct pl_dispatch *dp, struct pl_shader **psh, int dispatch_size[3]) { struct pl_shader *sh = *psh; const struct pl_shader_res *res = &sh->res; bool ret = false; if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (res->input != PL_SHADER_SIG_NONE || res->output != PL_SHADER_SIG_NONE) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } if (!pl_shader_is_compute(sh)) { PL_ERR(dp, "Trying to dispatch a non-compute shader using " "`pl_dispatch_compute`!"); goto error; } if (sh->res.num_vertex_attribs) { PL_ERR(dp, "Trying to dispatch a targetless compute shader that uses " "vertex attributes!"); goto error; } struct pass *pass = find_pass(dp, sh, NULL, NULL, NULL); // Silently return on failed passes if (pass->failed) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->res.num_descriptors; i++) rparams->desc_bindings[i].object = sh->res.descriptors[i].object; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < res->num_variables; i++) update_pass_var(dp, pass, &res->variables[i], &pass->vars[i]); // Update the dispatch size for (int i = 0; i < 3; i++) rparams->compute_groups[i] = dispatch_size[i]; // Dispatch the actual shader pl_pass_run(dp->gpu, &pass->run_params); ret = true; error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i].len = 0; pl_dispatch_abort(dp, psh); return ret; } void pl_dispatch_abort(struct pl_dispatch *dp, struct pl_shader **psh) { struct pl_shader *sh = *psh; if (!sh) return; // Re-add the shader to the internal pool of shaders TARRAY_APPEND(dp, dp->shaders, dp->num_shaders, sh); *psh = NULL; } libplacebo-0.4.0/src/dither.c000066400000000000000000000124431324021332500160200ustar00rootroot00000000000000/* * Generate a noise texture for dithering images. * Copyright © 2013 Wessel Dankers * * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . * * The original code is taken from mpv, under the same license. */ #include #include #include #include #include #include #include #include "common.h" void pl_generate_bayer_matrix(float *data, int size) { pl_assert(size >= 0); // Start with a single entry of 0 data[0] = 0; for (int sz = 1; sz < size; sz *= 2) { // Make three copies of the current, appropriately shifted and scaled for (int y = 0; y < sz; y ++) { for (int x = 0; x < sz; x++) { int offsets[] = {0, sz * size + sz, sz, sz * size}; int pos = y * size + x; for (int i = 1; i < 4; i++) data[pos + offsets[i]] = data[pos] + i / (4.0 * sz * sz); } } } } #define MAX_SIZEB 8 #define MAX_SIZE (1 << MAX_SIZEB) #define MAX_SIZE2 (MAX_SIZE * MAX_SIZE) typedef uint_fast32_t index_t; #define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1))) #define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb)))) struct ctx { unsigned int sizeb, size, size2; unsigned int gauss_radius; unsigned int gauss_middle; uint64_t gauss[MAX_SIZE2]; index_t randomat[MAX_SIZE2]; bool calcmat[MAX_SIZE2]; uint64_t gaussmat[MAX_SIZE2]; index_t unimat[MAX_SIZE2]; }; static void makegauss(struct ctx *k, unsigned int sizeb) { pl_assert(sizeb >= 1 && sizeb <= MAX_SIZEB); k->sizeb = sizeb; k->size = 1 << k->sizeb; k->size2 = k->size * k->size; k->gauss_radius = k->size / 2 - 1; k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius); unsigned int gauss_size = k->gauss_radius * 2 + 1; unsigned int gauss_size2 = gauss_size * gauss_size; for (index_t c = 0; c < k->size2; c++) k->gauss[c] = 0; double sigma = -log(1.5 / UINT64_MAX * gauss_size2) / k->gauss_radius; for (index_t gy = 0; gy <= k->gauss_radius; gy++) { for (index_t gx = 0; gx <= gy; gx++) { int cx = (int)gx - k->gauss_radius; int cy = (int)gy - k->gauss_radius; int sq = cx * cx + cy * cy; double e = exp(-sqrt(sq) * sigma); uint64_t v = e / gauss_size2 * UINT64_MAX; k->gauss[XY(k, gx, gy)] = k->gauss[XY(k, gy, gx)] = k->gauss[XY(k, gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gy, gauss_size - 1 - gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v; } } uint64_t total = 0; for (index_t c = 0; c < k->size2; c++) { uint64_t oldtotal = total; total += k->gauss[c]; assert(total >= oldtotal); } } static void setbit(struct ctx *k, index_t c) { if (k->calcmat[c]) return; k->calcmat[c] = true; uint64_t *m = k->gaussmat; uint64_t *me = k->gaussmat + k->size2; uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c); uint64_t *ge = k->gauss + k->size2; while (g < ge) *m++ += *g++; g = k->gauss; while (m < me) *m++ += *g++; } static index_t getmin(struct ctx *k) { uint64_t min = UINT64_MAX; index_t resnum = 0; unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { if (k->calcmat[c]) continue; uint64_t total = k->gaussmat[c]; if (total <= min) { if (total != min) { min = total; resnum = 0; } k->randomat[resnum++] = c; } } if (resnum == 1) return k->randomat[0]; if (resnum == size2) return size2 / 2; return k->randomat[rand() % resnum]; } static void makeuniform(struct ctx *k) { unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { index_t r = getmin(k); setbit(k, r); k->unimat[r] = c; } } void pl_generate_blue_noise(float *data, int size) { pl_assert(size > 0); int shift = PL_LOG2(size); pl_assert((1 << shift) == size); struct ctx *k = talloc_zero(NULL, struct ctx); makegauss(k, shift); makeuniform(k); float invscale = k->size2; for(index_t y = 0; y < k->size; y++) { for(index_t x = 0; x < k->size; x++) data[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale; } talloc_free(k); } libplacebo-0.4.0/src/filters.c000066400000000000000000000466771324021332500162310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ /* * Some of the filter code originally derives (via mpv) from Glumpy: * # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved. * # Distributed under the (new) BSD License. * (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py) * * The math underlying each filter function was written from scratch, with * some algorithms coming from a number of different sources, including: * - https://en.wikipedia.org/wiki/Window_function * - https://en.wikipedia.org/wiki/Jinc * - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h * - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on * dither plugin for avisynth from the same author: * https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc * - Paul Heckbert's "zoom" * - XBMC: ConvolutionKernels.cpp etc. * - https://github.com/AviSynth/jinc-resize (only used to verify the math) */ #include #include "common.h" #include "context.h" bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b) { if (!a || !b) return a == b; bool r = a->resizable == b->resizable && a->weight == b->weight && a->radius == b->radius; for (int i = 0; i < PL_FILTER_MAX_PARAMS; i++) { r &= a->tunable[i] == b->tunable[i]; if (a->tunable[i]) r &= a->params[i] == b->params[i]; } return r; } bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b) { if (!a || !b) return a == b; return pl_filter_function_eq(a->kernel, b->kernel) && pl_filter_function_eq(a->window, b->window) && a->clamp == b->clamp && a->blur == b->blur && a->taper == b->taper && a->polar == b->polar; } double pl_filter_sample(const struct pl_filter_config *c, double x) { double radius = c->kernel->radius; // All filters are symmetric, and in particular only need to be defined // for [0, radius]. x = fabs(x); // Apply the blur and taper coefficients as needed double kx = c->blur > 0.0 ? x / c->blur : x; kx = kx <= c->taper ? 0.0 : (kx - c->taper) / (1.0 - c->taper / radius); // Return early for values outside of the kernel radius, since the functions // are not necessarily valid outside of this interval. No such check is // needed for the window, because it's always stretched to fit. if (kx > radius) return 0.0; double k = c->kernel->weight(c->kernel, kx); // Apply the optional windowing function if (c->window) k *= c->window->weight(c->window, x / radius * c->window->radius); return k < 0 ? (1 - c->clamp) * k : k; } // Calculate a single filter row of a 1D filter, for a given phase value / // subpixel offset `offset`. Writes exactly f->row_size values to *out. static void compute_row(struct pl_filter *f, double offset, float *out) { pl_assert(f->row_size > 0); double sum = 0; for (int i = 0; i < f->row_size; i++) { double x = offset - (i - f->row_size / 2.0 + 1); // Readjust the value range to account for a stretched kernel. x *= f->params.config.kernel->radius / f->radius; double weight = pl_filter_sample(&f->params.config, x); out[i] = weight; sum += weight; } // Normalize to preserve energy if (sum > 0.0) { for (int i = 0; i < f->row_size; i++) out[i] /= sum; } } static struct pl_filter_function *dupfilter(void *tactx, const struct pl_filter_function *f) { return f ? talloc_memdup(tactx, (void *)f, sizeof(*f)) : NULL; } const struct pl_filter *pl_filter_generate(struct pl_context *ctx, const struct pl_filter_params *params) { pl_assert(params); if (params->lut_entries <= 0 || !params->config.kernel) { pl_fatal(ctx, "Invalid params: missing lut_entries or config.kernel"); return NULL; } struct pl_filter *f = talloc_zero(ctx, struct pl_filter); f->params = *params; f->params.config.kernel = dupfilter(f, params->config.kernel); f->params.config.window = dupfilter(f, params->config.window); // Compute the required filter radius float radius = f->params.config.kernel->radius; f->radius = radius; if (params->filter_scale > 1.0) f->radius *= params->filter_scale; float *weights; if (params->config.polar) { // Compute a 1D array indexed by radius weights = talloc_array(f, float, params->lut_entries); f->radius_cutoff = 0.0; for (int i = 0; i < params->lut_entries; i++) { double x = radius * i / (params->lut_entries - 1); weights[i] = pl_filter_sample(&f->params.config, x); if (fabs(weights[i]) > params->cutoff) f->radius_cutoff = x; } } else { // Pick the most appropriate row size f->row_size = ceil(f->radius * 2.0); if (params->max_row_size && f->row_size > params->max_row_size) { pl_info(ctx, "Required filter size %d exceeds the maximum allowed " "size of %d. This may result in adverse effects (aliasing, " "or moiré artifacts).", f->row_size, params->max_row_size); f->row_size = params->max_row_size; f->insufficient = true; } f->row_stride = PL_ALIGN(f->row_size, params->row_stride_align); // Compute a 2D array indexed by the subpixel position weights = talloc_zero_array(f, float, params->lut_entries * f->row_stride); for (int i = 0; i < params->lut_entries; i++) { compute_row(f, i / (double)(params->lut_entries - 1), weights + f->row_stride * i); } } f->weights = weights; return f; } void pl_filter_free(const struct pl_filter **filter) { TA_FREEP((void **) filter); } const struct pl_named_filter_function *pl_find_named_filter_function(const char *name) { if (!name) return NULL; for (int i = 0; pl_named_filter_functions[i].function; i++) { if (strcmp(pl_named_filter_functions[i].name, name) == 0) return &pl_named_filter_functions[i]; } return NULL; } const struct pl_named_filter_config *pl_find_named_filter(const char *name) { if (!name) return NULL; for (int i = 0; pl_named_filters[i].filter; i++) { if (strcmp(pl_named_filters[i].name, name) == 0) return &pl_named_filters[i]; } return NULL; } // Built-in filter functions static double box(const struct pl_filter_function *f, double x) { return x < 0.5 ? 1.0 : 0.0; } const struct pl_filter_function pl_filter_function_box = { .resizable = true, .weight = box, .radius = 1.0, }; static double triangle(const struct pl_filter_function *f, double x) { return 1.0 - x / f->radius; } const struct pl_filter_function pl_filter_function_triangle = { .resizable = true, .weight = triangle, .radius = 1.0, }; static double hann(const struct pl_filter_function *f, double x) { return 0.5 + 0.5 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hann = { .weight = hann, .radius = 1.0, }; static double hamming(const struct pl_filter_function *f, double x) { return 0.54 + 0.46 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hamming = { .weight = hamming, .radius = 1.0, }; static double welch(const struct pl_filter_function *f, double x) { return 1.0 - x * x; } const struct pl_filter_function pl_filter_function_welch = { .weight = welch, .radius = 1.0, }; static double bessel_i0(double x) { double s = 1.0; double y = x * x / 4.0; double t = y; int i = 2; while (t > 1e-12) { s += t; t *= y / (i * i); i += 1; } return s; } static double kaiser(const struct pl_filter_function *f, double x) { double alpha = fmax(f->params[0], 0.0); return bessel_i0(alpha * sqrt(1.0 - x * x)) / alpha; } const struct pl_filter_function pl_filter_function_kaiser = { .tunable = {true}, .weight = kaiser, .radius = 1.0, .params = {2.0}, }; static double blackman(const struct pl_filter_function *f, double x) { double a = f->params[0]; double a0 = (1 - a) / 2.0, a1 = 1 / 2.0, a2 = a / 2.0; x *= M_PI; return a0 + a1 * cos(x) + a2 * cos(2 * x); } const struct pl_filter_function pl_filter_function_blackman = { .tunable = {true}, .weight = blackman, .radius = 1.0, .params = {0.16}, }; static double gaussian(const struct pl_filter_function *f, double x) { return exp(-2.0 * x * x / f->params[0]); } const struct pl_filter_function pl_filter_function_gaussian = { .resizable = true, .tunable = {true}, .weight = gaussian, .radius = 2.0, .params = {1.0}, }; static double sinc(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return sin(x) / x; } const struct pl_filter_function pl_filter_function_sinc = { .resizable = true, .weight = sinc, .radius = 1.0, }; static double jinc(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 2.0 * j1(x) / x; } const struct pl_filter_function pl_filter_function_jinc = { .resizable = true, .weight = jinc, .radius = 1.2196698912665045, // first zero }; static double sphinx(const struct pl_filter_function *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 3.0 * (sin(x) - x * cos(x)) / (x * x * x); } const struct pl_filter_function pl_filter_function_sphinx = { .resizable = true, .weight = sphinx, .radius = 1.4302966531242027, // first zero }; static double bcspline(const struct pl_filter_function *f, double x) { double b = f->params[0], c = f->params[1]; double p0 = (6.0 - 2.0 * b) / 6.0, p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0, p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0, q0 = (8.0 * b + 24.0 * c) / 6.0, q1 = (-12.0 * b - 48.0 * c) / 6.0, q2 = (6.0 * b + 30.0 * c) / 6.0, q3 = (-b - 6.0 * c) / 6.0; // Needed to ensure the kernel is sanely scaled, i.e. bcspline(0.0) = 1.0 double scale = 1.0 / p0; if (x < 1.0) { return scale * (p0 + x * x * (p2 + x * p3)); } else if (x < 2.0) { return scale * (q0 + x * (q1 + x * (q2 + x * q3))); } return 0.0; } const struct pl_filter_function pl_filter_function_bcspline = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {0.5, 0.5}, }; const struct pl_filter_function pl_filter_function_catmull_rom = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {0.0, 0.5}, }; const struct pl_filter_function pl_filter_function_mitchell = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {1/3.0, 1/3.0}, }; const struct pl_filter_function pl_filter_function_robidoux = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {12 / (19 + 9 * M_SQRT2), 113 / (58 + 216 * M_SQRT2)}, }; const struct pl_filter_function pl_filter_function_robidouxsharp = { .tunable = {true, true}, .weight = bcspline, .radius = 2.0, .params = {6 / (13 + 7 * M_SQRT2), 7 / (2 + 12 * M_SQRT2)}, }; #define POW3(x) ((x) <= 0 ? 0 : (x) * (x) * (x)) static double bicubic(const struct pl_filter_function *f, double x) { return (1.0/6.0) * ( 1 * POW3(x + 2) - 4 * POW3(x + 1) + 6 * POW3(x + 0) - 4 * POW3(x - 1)); } const struct pl_filter_function pl_filter_function_bicubic = { .weight = bicubic, .radius = 2.0, }; static double spline16(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0; } else { return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1); } } const struct pl_filter_function pl_filter_function_spline16 = { .weight = spline16, .radius = 2.0, }; static double spline36(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0; } else if (x < 2.0) { return ((-6.0/11.0 * (x-1) + 270.0/209.0) * (x-1) - 156.0/ 209.0) * (x-1); } else { return ((1.0/11.0 * (x-2) - 45.0/209.0) * (x-2) + 26.0/209.0) * (x-2); } } const struct pl_filter_function pl_filter_function_spline36 = { .weight = spline36, .radius = 3.0, }; static double spline64(const struct pl_filter_function *f, double x) { if (x < 1.0) { return ((49.0/41.0 * x - 6387.0/2911.0) * x - 3.0/2911.0) * x + 1.0; } else if (x < 2.0) { return ((-24.0/41.0 * (x-1) + 4032.0/2911.0) * (x-1) - 2328.0/2911.0) * (x-1); } else if (x < 3.0) { return ((6.0/41.0 * (x-2) - 1008.0/2911.0) * (x-2) + 582.0/2911.0) * (x-2); } else { return ((-1.0/41.0 * (x-3) + 168.0/2911.0) * (x-3) - 97.0/2911.0) * (x-3); } } const struct pl_filter_function pl_filter_function_spline64 = { .weight = spline64, .radius = 4.0, }; // Named filter functions const struct pl_named_filter_function pl_named_filter_functions[] = { {"box", &pl_filter_function_box}, {"dirichlet", &pl_filter_function_box}, // alias {"triangle", &pl_filter_function_triangle}, {"hann", &pl_filter_function_hann}, {"hanning", &pl_filter_function_hann}, // alias {"hamming", &pl_filter_function_hamming}, {"welch", &pl_filter_function_welch}, {"kaiser", &pl_filter_function_kaiser}, {"blackman", &pl_filter_function_blackman}, {"gaussian", &pl_filter_function_gaussian}, {"sinc", &pl_filter_function_sinc}, {"jinc", &pl_filter_function_jinc}, {"sphinx", &pl_filter_function_sphinx}, {"bcspline", &pl_filter_function_bcspline}, {"hermite", &pl_filter_function_bcspline}, // alias {"catmull_rom", &pl_filter_function_catmull_rom}, {"mitchell", &pl_filter_function_mitchell}, {"robidoux", &pl_filter_function_robidoux}, {"robidouxsharp", &pl_filter_function_robidouxsharp}, {"bicubic", &pl_filter_function_bicubic}, {"spline16", &pl_filter_function_spline16}, {"spline36", &pl_filter_function_spline36}, {"spline64", &pl_filter_function_spline64}, {0}, }; // Built-in filter function presets const struct pl_filter_config pl_filter_spline16 = { .kernel = &pl_filter_function_spline16, }; const struct pl_filter_config pl_filter_spline36 = { .kernel = &pl_filter_function_spline36, }; const struct pl_filter_config pl_filter_spline64 = { .kernel = &pl_filter_function_spline64, }; const struct pl_filter_config pl_filter_box = { .kernel = &pl_filter_function_box, }; const struct pl_filter_config pl_filter_triangle = { .kernel = &pl_filter_function_triangle, }; const struct pl_filter_config pl_filter_gaussian = { .kernel = &pl_filter_function_gaussian, }; // Sinc configured to three taps static const struct pl_filter_function sinc3 = { .resizable = true, .weight = sinc, .radius = 3.0, }; const struct pl_filter_config pl_filter_sinc = { .kernel = &sinc3, }; const struct pl_filter_config pl_filter_lanczos = { .kernel = &sinc3, .window = &pl_filter_function_sinc, }; const struct pl_filter_config pl_filter_ginseng = { .kernel = &sinc3, .window = &pl_filter_function_jinc, }; // Jinc configured to three taps static const struct pl_filter_function jinc3 = { .resizable = true, .weight = jinc, .radius = 3.2383154841662362, // third zero }; const struct pl_filter_config pl_filter_ewa_jinc = { .kernel = &jinc3, .polar = true, }; const struct pl_filter_config pl_filter_ewa_lanczos = { .kernel = &jinc3, .window = &pl_filter_function_jinc, .polar = true, }; const struct pl_filter_config pl_filter_ewa_ginseng = { .kernel = &jinc3, .window = &pl_filter_function_sinc, .polar = true, }; const struct pl_filter_config pl_filter_ewa_hann = { .kernel = &jinc3, .window = &pl_filter_function_hann, .polar = true, }; const struct pl_filter_config pl_filter_haasnsoft = { .kernel = &jinc3, .window = &pl_filter_function_hann, // The blur is tuned to equal out orthogonal and diagonal contributions // on a regular grid. This has the effect of almost completely killing // aliasing. .blur = 1.11, .polar = true, }; // Spline family const struct pl_filter_config pl_filter_bicubic = { .kernel = &pl_filter_function_bicubic, }; const struct pl_filter_config pl_filter_catmull_rom = { .kernel = &pl_filter_function_catmull_rom, }; const struct pl_filter_config pl_filter_mitchell = { .kernel = &pl_filter_function_mitchell, }; const struct pl_filter_config pl_filter_robidoux = { .kernel = &pl_filter_function_robidoux, }; const struct pl_filter_config pl_filter_robidouxsharp = { .kernel = &pl_filter_function_robidouxsharp, }; const struct pl_filter_config pl_filter_ewa_robidoux = { .kernel = &pl_filter_function_robidoux, .polar = true, }; const struct pl_filter_config pl_filter_ewa_robidouxsharp = { .kernel = &pl_filter_function_robidouxsharp, .polar = true, }; // Named filter configs const struct pl_named_filter_config pl_named_filters[] = { {"spline16", &pl_filter_spline16}, {"spline36", &pl_filter_spline36}, {"spline64", &pl_filter_spline64}, {"box", &pl_filter_box}, {"nearest", &pl_filter_box}, // alias {"triangle", &pl_filter_triangle}, {"bilinear", &pl_filter_triangle}, // alias {"gaussian", &pl_filter_gaussian}, {"sinc", &pl_filter_sinc}, {"lanczos", &pl_filter_lanczos}, {"ginseng", &pl_filter_ginseng}, {"ewa_jinc", &pl_filter_ewa_jinc}, {"ewa_lanczos", &pl_filter_ewa_lanczos}, {"ewa_ginseng", &pl_filter_ewa_ginseng}, {"ewa_hann", &pl_filter_ewa_hann}, {"ewa_hanning", &pl_filter_ewa_hann}, // alias {"haasnsoft", &pl_filter_haasnsoft}, {"bicubic", &pl_filter_bicubic}, {"catmull_rom", &pl_filter_catmull_rom}, {"mitchell", &pl_filter_mitchell}, {"robidoux", &pl_filter_robidoux}, {"robidouxsharp", &pl_filter_robidouxsharp}, {"ewa_robidoux", &pl_filter_ewa_robidoux}, {"ewa_robidouxsharp", &pl_filter_ewa_robidouxsharp}, {0}, }; libplacebo-0.4.0/src/gpu.c000066400000000000000000001314631324021332500153400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "context.h" #include "shaders.h" #include "gpu.h" int pl_optimal_transfer_stride(const struct pl_gpu *gpu, int dimension) { return PL_ALIGN2(dimension, gpu->limits.align_tex_xfer_stride); } void pl_gpu_destroy(const struct pl_gpu *gpu) { if (!gpu) return; gpu->impl->destroy(gpu); } void pl_gpu_print_info(const struct pl_gpu *gpu, enum pl_log_level lev) { PL_MSG(gpu, lev, "GPU information:"); PL_MSG(gpu, lev, " GLSL version: %d%s", gpu->glsl.version, gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : ""); PL_MSG(gpu, lev, " Capabilities: 0x%x", (unsigned int) gpu->caps); PL_MSG(gpu, lev, " Limits:"); #define LOG(fmt, field) \ PL_MSG(gpu, lev, " %-26s " fmt, #field ":", gpu->limits.field) LOG("%d", max_tex_1d_dim); LOG("%d", max_tex_2d_dim); LOG("%d", max_tex_3d_dim); LOG("%zu", max_pushc_size); LOG("%zu", max_xfer_size); LOG("%zu", max_ubo_size); LOG("%zu", max_ssbo_size); LOG("%d", min_gather_offset); LOG("%d", max_gather_offset); if (gpu->caps & PL_GPU_CAP_COMPUTE) { LOG("%zu", max_shmem_size); LOG("%d", max_group_threads); LOG("%d", max_group_size[0]); LOG("%d", max_group_size[1]); LOG("%d", max_group_size[2]); LOG("%d", max_dispatch[0]); LOG("%d", max_dispatch[1]); LOG("%d", max_dispatch[2]); } LOG("%d", align_tex_xfer_stride); LOG("%zu", align_tex_xfer_offset); #undef LOG } static int cmp_fmt(const void *pa, const void *pb) { const struct pl_fmt *a = *(const struct pl_fmt **)pa; const struct pl_fmt *b = *(const struct pl_fmt **)pb; // Always prefer non-opaque formats if (a->opaque != b->opaque) return PL_CMP(a->opaque, b->opaque); // Always prefer non-emulated formats if (a->emulated != b->emulated) return PL_CMP(a->emulated, b->emulated); int ca = __builtin_popcount(a->caps), cb = __builtin_popcount(b->caps); if (ca != cb) return -PL_CMP(ca, cb); // invert to sort higher values first // If the population count is the same but the caps are different, prefer // the caps with a "lower" value (which tend to be more fundamental caps) if (a->caps != b->caps) return PL_CMP(a->caps, b->caps); // If the capabilities are equal, sort based on the component attributes for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) { int da = a->component_depth[i], db = b->component_depth[i]; if (da != db) return PL_CMP(da, db); int ha = a->host_bits[i], hb = b->host_bits[i]; if (ha != hb) return PL_CMP(ha, hb); int oa = a->sample_order[i], ob = b->sample_order[i]; if (oa != ob) return PL_CMP(oa, ob); } // Fall back to sorting by the name (for stability) return strcmp(a->name, b->name); } void pl_gpu_sort_formats(struct pl_gpu *gpu) { qsort(gpu->formats, gpu->num_formats, sizeof(struct pl_fmt *), cmp_fmt); } void pl_gpu_print_formats(const struct pl_gpu *gpu, enum pl_log_level lev) { if (!pl_msg_test(gpu->ctx, lev)) return; PL_MSG(gpu, lev, "GPU texture formats:"); PL_MSG(gpu, lev, " %-10s %-6s %-6s %-4s %-4s %-13s %-13s %-10s %-10s", "NAME", "TYPE", "CAPS", "SIZE", "COMP", "DEPTH", "BITS", "GLSL_TYPE", "GLSL_FMT"); for (int n = 0; n < gpu->num_formats; n++) { const struct pl_fmt *fmt = gpu->formats[n]; static const char *types[] = { [PL_FMT_UNKNOWN] = "UNKNOWN", [PL_FMT_UNORM] = "UNORM", [PL_FMT_SNORM] = "SNORM", [PL_FMT_UINT] = "UINT", [PL_FMT_SINT] = "SINT", [PL_FMT_FLOAT] = "FLOAT", }; static const char idx_map[4] = {'R', 'G', 'B', 'A'}; char indices[4] = {' ', ' ', ' ', ' '}; if (!fmt->opaque) { for (int i = 0; i < fmt->num_components; i++) indices[i] = idx_map[fmt->sample_order[i]]; } #define IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3] PL_MSG(gpu, lev, " %-10s %-6s 0x%-4x %-4zu %c%c%c%c " "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s", fmt->name, types[fmt->type], (unsigned int) fmt->caps, fmt->texel_size, IDX4(indices), IDX4(fmt->component_depth), IDX4(fmt->host_bits), PL_DEF(fmt->glsl_type, ""), PL_DEF(fmt->glsl_format, "")); #undef IDX4 } } bool pl_fmt_is_ordered(const struct pl_fmt *fmt) { bool ret = !fmt->opaque; for (int i = 0; i < fmt->num_components; i++) ret &= fmt->sample_order[i] == i; return ret; } struct glsl_fmt { enum pl_fmt_type type; int num_components; int depth[4]; const char *glsl_format; }; // List taken from the GLSL specification. (Yes, GLSL supports only exactly // these formats with exactly these names) static const struct glsl_fmt pl_glsl_fmts[] = { {PL_FMT_FLOAT, 1, {16}, "r16f"}, {PL_FMT_FLOAT, 1, {32}, "r32f"}, {PL_FMT_FLOAT, 2, {16, 16}, "rg16f"}, {PL_FMT_FLOAT, 2, {32, 32}, "rg32f"}, {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, {PL_FMT_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, {PL_FMT_UNORM, 1, {8}, "r8"}, {PL_FMT_UNORM, 1, {16}, "r16"}, {PL_FMT_UNORM, 2, {8, 8}, "rg8"}, {PL_FMT_UNORM, 2, {16, 16}, "rg16"}, {PL_FMT_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, {PL_FMT_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, {PL_FMT_SNORM, 1, {8}, "r8_snorm"}, {PL_FMT_SNORM, 1, {16}, "r16_snorm"}, {PL_FMT_SNORM, 2, {8, 8}, "rg8_snorm"}, {PL_FMT_SNORM, 2, {16, 16}, "rg16_snorm"}, {PL_FMT_SNORM, 4, {8, 8, 8, 8}, "rgba8_snorm"}, {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"}, {PL_FMT_UINT, 1, {8}, "r8ui"}, {PL_FMT_UINT, 1, {16}, "r16ui"}, {PL_FMT_UINT, 1, {32}, "r32ui"}, {PL_FMT_UINT, 2, {8, 8}, "rg8ui"}, {PL_FMT_UINT, 2, {16, 16}, "rg16ui"}, {PL_FMT_UINT, 2, {32, 32}, "rg32ui"}, {PL_FMT_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, {PL_FMT_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, {PL_FMT_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, {PL_FMT_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, {PL_FMT_SINT, 1, {8}, "r8i"}, {PL_FMT_SINT, 1, {16}, "r16i"}, {PL_FMT_SINT, 1, {32}, "r32i"}, {PL_FMT_SINT, 2, {8, 8}, "rg8i"}, {PL_FMT_SINT, 2, {16, 16}, "rg16i"}, {PL_FMT_SINT, 2, {32, 32}, "rg32i"}, {PL_FMT_SINT, 4, {8, 8, 8, 8}, "rgba8i"}, {PL_FMT_SINT, 4, {16, 16, 16, 16}, "rgba16i"}, {PL_FMT_SINT, 4, {32, 32, 32, 32}, "rgba32i"}, }; const char *pl_fmt_glsl_format(const struct pl_fmt *fmt) { if (fmt->opaque) return NULL; int components = fmt->num_components; if (fmt->emulated && components == 3) components = 4; for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) { const struct glsl_fmt *gfmt = &pl_glsl_fmts[n]; if (fmt->type != gfmt->type) continue; if (components != gfmt->num_components) continue; // The component order is irrelevant, so we need to sort the depth // based on the component's index int depth[4] = {0}; for (int i = 0; i < fmt->num_components; i++) depth[fmt->sample_order[i]] = fmt->component_depth[i]; // Copy over any emulated components for (int i = fmt->num_components; i < components; i++) depth[i] = gfmt->depth[i]; for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) { if (depth[i] != gfmt->depth[i]) goto next_fmt; } return gfmt->glsl_format; next_fmt: ; // equivalent to `continue` } return NULL; } const struct pl_fmt *pl_find_fmt(const struct pl_gpu *gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps) { for (int n = 0; n < gpu->num_formats; n++) { const struct pl_fmt *fmt = gpu->formats[n]; if (fmt->type != type || fmt->num_components != num_components) continue; if ((fmt->caps & caps) != caps) continue; // When specifying some particular host representation, ensure the // format is non-opaque, ordered and unpadded if (host_bits && fmt->opaque) continue; if (host_bits && fmt->texel_size * 8 != host_bits * num_components) continue; if (host_bits && !pl_fmt_is_ordered(fmt)) continue; for (int i = 0; i < fmt->num_components; i++) { if (fmt->component_depth[i] < min_depth) goto next_fmt; if (host_bits && fmt->host_bits[i] != host_bits) goto next_fmt; } return fmt; next_fmt: ; // equivalent to `continue` } // ran out of formats PL_DEBUG(gpu, "No matching format found"); return NULL; } const struct pl_fmt *pl_find_vertex_fmt(const struct pl_gpu *gpu, enum pl_fmt_type type, int comps) { static const size_t sizes[] = { [PL_FMT_FLOAT] = sizeof(float), [PL_FMT_UNORM] = sizeof(unsigned), [PL_FMT_UINT] = sizeof(unsigned), [PL_FMT_SNORM] = sizeof(int), [PL_FMT_SINT] = sizeof(int), }; return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX); } const struct pl_fmt *pl_find_named_fmt(const struct pl_gpu *gpu, const char *name) { if (!name) return NULL; for (int i = 0; i < gpu->num_formats; i++) { const struct pl_fmt *fmt = gpu->formats[i]; if (strcmp(name, fmt->name) == 0) return fmt; } // ran out of formats return NULL; } const struct pl_tex *pl_tex_create(const struct pl_gpu *gpu, const struct pl_tex_params *params) { switch (pl_tex_params_dimension(*params)) { case 1: pl_assert(params->w > 0); pl_assert(params->w <= gpu->limits.max_tex_1d_dim); pl_assert(!params->renderable); break; case 2: pl_assert(params->w > 0 && params->h > 0); pl_assert(params->w <= gpu->limits.max_tex_2d_dim); pl_assert(params->h <= gpu->limits.max_tex_2d_dim); break; case 3: pl_assert(params->w > 0 && params->h > 0 && params->d > 0); pl_assert(params->w <= gpu->limits.max_tex_3d_dim); pl_assert(params->h <= gpu->limits.max_tex_3d_dim); pl_assert(params->d <= gpu->limits.max_tex_3d_dim); pl_assert(!params->renderable); break; } const struct pl_fmt *fmt = params->format; pl_assert(fmt); pl_assert(!params->sampleable || fmt->caps & PL_FMT_CAP_SAMPLEABLE); pl_assert(!params->renderable || fmt->caps & PL_FMT_CAP_RENDERABLE); pl_assert(!params->storable || fmt->caps & PL_FMT_CAP_STORABLE); pl_assert(!params->blit_src || fmt->caps & PL_FMT_CAP_BLITTABLE); pl_assert(!params->blit_dst || fmt->caps & PL_FMT_CAP_BLITTABLE); pl_assert(params->sample_mode != PL_TEX_SAMPLE_LINEAR || fmt->caps & PL_FMT_CAP_LINEAR); return gpu->impl->tex_create(gpu, params); } static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b) { return a.w == b.w && a.h == b.h && a.d == b.d && a.format == b.format && a.sample_mode == b.sample_mode && a.address_mode == b.address_mode && (a.sampleable || !b.sampleable) && (a.renderable || !b.renderable) && (a.storable || !b.storable) && (a.blit_src || !b.blit_src) && (a.blit_dst || !b.blit_dst) && (a.host_writable || !b.host_writable) && (a.host_readable || !b.host_readable); } bool pl_tex_recreate(const struct pl_gpu *gpu, const struct pl_tex **tex, const struct pl_tex_params *params) { if (*tex && pl_tex_params_superset((*tex)->params, *params)) return true; PL_INFO(gpu, "(Re)creating %dx%dx%d texture", params->w, params->h, params->d); pl_tex_destroy(gpu, tex); *tex = pl_tex_create(gpu, params); return !!*tex; } void pl_tex_destroy(const struct pl_gpu *gpu, const struct pl_tex **tex) { if (!*tex) return; gpu->impl->tex_destroy(gpu, *tex); *tex = NULL; } void pl_tex_clear(const struct pl_gpu *gpu, const struct pl_tex *dst, const float color[4]) { pl_assert(dst->params.blit_dst); pl_tex_invalidate(gpu, dst); gpu->impl->tex_clear(gpu, dst, color); } void pl_tex_invalidate(const struct pl_gpu *gpu, const struct pl_tex *tex) { gpu->impl->tex_invalidate(gpu, tex); } static void strip_coords(const struct pl_tex *tex, struct pl_rect3d *rc) { if (!tex->params.d) { rc->z0 = 0; rc->z1 = 1; } if (!tex->params.h) { rc->y0 = 0; rc->y1 = 1; } } void pl_tex_blit(const struct pl_gpu *gpu, const struct pl_tex *dst, const struct pl_tex *src, struct pl_rect3d dst_rc, struct pl_rect3d src_rc) { const struct pl_fmt *src_fmt = src->params.format; const struct pl_fmt *dst_fmt = dst->params.format; pl_assert(src_fmt->texel_size == dst_fmt->texel_size); pl_assert((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT)); pl_assert((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT)); pl_assert(src->params.blit_src); pl_assert(dst->params.blit_dst); pl_assert(src_rc.x0 >= 0 && src_rc.x0 < src->params.w); pl_assert(src_rc.x1 > 0 && src_rc.x1 <= src->params.w); pl_assert(dst_rc.x0 >= 0 && dst_rc.x0 < dst->params.w); pl_assert(dst_rc.x1 > 0 && dst_rc.x1 <= dst->params.w); if (src->params.h) { pl_assert(dst->params.h); pl_assert(src_rc.y0 >= 0 && src_rc.y0 < src->params.h); pl_assert(src_rc.y1 > 0 && src_rc.y1 <= src->params.h); } if (dst->params.h) { pl_assert(dst_rc.y0 >= 0 && dst_rc.y0 < dst->params.h); pl_assert(dst_rc.y1 > 0 && dst_rc.y1 <= dst->params.h); } if (src->params.d) { pl_assert(dst->params.d); pl_assert(src_rc.z0 >= 0 && src_rc.z0 < src->params.d); pl_assert(src_rc.z1 > 0 && src_rc.z1 <= src->params.d); } if (dst->params.d) { pl_assert(dst_rc.z0 >= 0 && dst_rc.z0 < dst->params.d); pl_assert(dst_rc.z1 > 0 && dst_rc.z1 <= dst->params.d); } strip_coords(src, &src_rc); strip_coords(dst, &dst_rc); struct pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d}; strip_coords(dst, &full); struct pl_rect3d rcnorm = dst_rc; pl_rect3d_normalize(&rcnorm); if (pl_rect3d_eq(rcnorm, full)) pl_tex_invalidate(gpu, dst); gpu->impl->tex_blit(gpu, dst, src, dst_rc, src_rc); } size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par) { const struct pl_tex *tex = par->tex; int texels; switch (pl_tex_params_dimension(tex->params)) { case 1: texels = pl_rect_w(par->rc); break; case 2: texels = pl_rect_h(par->rc) * par->stride_w; break; case 3: texels = pl_rect_d(par->rc) * par->stride_w * par->stride_h; break; } return texels * tex->params.format->texel_size; } static void fix_tex_transfer(const struct pl_gpu *gpu, struct pl_tex_transfer_params *params) { const struct pl_tex *tex = params->tex; struct pl_rect3d rc = params->rc; // Infer the default values if (!rc.x0 && !rc.x1) rc.x1 = tex->params.w; if (!rc.y0 && !rc.y1) rc.y1 = tex->params.h; if (!rc.z0 && !rc.z1) rc.z1 = tex->params.d; if (!params->stride_w) params->stride_w = tex->params.w; if (!params->stride_h) params->stride_h = tex->params.h; // Sanitize superfluous coordinates for the benefit of the GPU strip_coords(tex, &rc); if (!tex->params.w) params->stride_w = 1; if (!tex->params.h) params->stride_h = 1; params->rc = rc; // Check the parameters for sanity #ifndef NDEBUG switch (pl_tex_params_dimension(tex->params)) { case 3: pl_assert(rc.z1 > rc.z0); pl_assert(rc.z0 >= 0 && rc.z0 < tex->params.d); pl_assert(rc.z1 > 0 && rc.z1 <= tex->params.d); pl_assert(params->stride_h >= pl_rect_h(rc)); // fall through case 2: pl_assert(rc.y1 > rc.y0); pl_assert(rc.y0 >= 0 && rc.y0 < tex->params.h); pl_assert(rc.y1 > 0 && rc.y1 <= tex->params.h); pl_assert(params->stride_w >= pl_rect_w(rc)); // fall through case 1: pl_assert(rc.x1 > rc.x0); pl_assert(rc.x0 >= 0 && rc.x0 < tex->params.w); pl_assert(rc.x1 > 0 && rc.x1 <= tex->params.w); break; } pl_assert(!params->buf ^ !params->ptr); // exactly one if (params->buf) { const struct pl_buf *buf = params->buf; size_t size = pl_tex_transfer_size(params); pl_assert(params->buf_offset == PL_ALIGN2(params->buf_offset, 4)); pl_assert(params->buf_offset + size <= buf->params.size); } #endif } bool pl_tex_upload(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params) { const struct pl_tex *tex = params->tex; pl_assert(tex); pl_assert(tex->params.host_writable); struct pl_tex_transfer_params fixed = *params; fix_tex_transfer(gpu, &fixed); return gpu->impl->tex_upload(gpu, &fixed); } bool pl_tex_download(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params) { const struct pl_tex *tex = params->tex; pl_assert(tex); pl_assert(tex->params.host_readable); struct pl_tex_transfer_params fixed = *params; fix_tex_transfer(gpu, &fixed); return gpu->impl->tex_download(gpu, &fixed); } const struct pl_buf *pl_buf_create(const struct pl_gpu *gpu, const struct pl_buf_params *params) { switch (params->type) { case PL_BUF_TEX_TRANSFER: pl_assert(gpu->limits.max_xfer_size); pl_assert(params->size <= gpu->limits.max_xfer_size); break; case PL_BUF_UNIFORM: pl_assert(gpu->limits.max_ubo_size); pl_assert(params->size <= gpu->limits.max_ubo_size); break; case PL_BUF_STORAGE: pl_assert(gpu->limits.max_ssbo_size); pl_assert(params->size <= gpu->limits.max_ssbo_size); break; case PL_BUF_TEXEL_UNIFORM: { pl_assert(params->format); pl_assert(params->format->caps & PL_FMT_CAP_TEXEL_UNIFORM); size_t limit = gpu->limits.max_buffer_texels * params->format->texel_size; pl_assert(params->size <= limit); break; } case PL_BUF_TEXEL_STORAGE: { pl_assert(params->format); pl_assert(params->format->caps & PL_FMT_CAP_TEXEL_STORAGE); size_t limit = gpu->limits.max_buffer_texels * params->format->texel_size; pl_assert(params->size <= limit); break; } case PL_BUF_PRIVATE: break; default: abort(); } const struct pl_buf *buf = gpu->impl->buf_create(gpu, params); if (buf) pl_assert(buf->data || !params->host_mapped); return buf; } void pl_buf_destroy(const struct pl_gpu *gpu, const struct pl_buf **buf) { if (!*buf) return; gpu->impl->buf_destroy(gpu, *buf); *buf = NULL; } void pl_buf_write(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t buf_offset, const void *data, size_t size) { pl_assert(buf->params.host_writable); pl_assert(buf_offset + size <= buf->params.size); pl_assert(buf_offset == PL_ALIGN2(buf_offset, 4)); gpu->impl->buf_write(gpu, buf, buf_offset, data, size); } bool pl_buf_read(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t buf_offset, void *dest, size_t size) { pl_assert(buf->params.host_readable); pl_assert(buf_offset + size <= buf->params.size); pl_assert(buf_offset == PL_ALIGN2(buf_offset, 4)); return gpu->impl->buf_read(gpu, buf, buf_offset, dest, size); } bool pl_buf_poll(const struct pl_gpu *gpu, const struct pl_buf *buf, uint64_t t) { return gpu->impl->buf_poll ? gpu->impl->buf_poll(gpu, buf, t) : false; } size_t pl_var_type_size(enum pl_var_type type) { switch (type) { case PL_VAR_SINT: return sizeof(int); case PL_VAR_UINT: return sizeof(unsigned int); case PL_VAR_FLOAT: return sizeof(float); default: abort(); } } #define MAX_DIM 4 const char *pl_var_glsl_type_name(struct pl_var var) { static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = { // float vectors [PL_VAR_FLOAT][1][1] = "float", [PL_VAR_FLOAT][1][2] = "vec2", [PL_VAR_FLOAT][1][3] = "vec3", [PL_VAR_FLOAT][1][4] = "vec4", // float matrices [PL_VAR_FLOAT][2][2] = "mat2", [PL_VAR_FLOAT][2][3] = "mat2x3", [PL_VAR_FLOAT][2][4] = "mat2x4", [PL_VAR_FLOAT][3][2] = "mat3x2", [PL_VAR_FLOAT][3][3] = "mat3", [PL_VAR_FLOAT][3][4] = "mat3x4", [PL_VAR_FLOAT][4][2] = "mat4x2", [PL_VAR_FLOAT][4][3] = "mat4x3", [PL_VAR_FLOAT][4][4] = "mat4", // integer vectors [PL_VAR_SINT][1][1] = "int", [PL_VAR_SINT][1][2] = "ivec2", [PL_VAR_SINT][1][3] = "ivec3", [PL_VAR_SINT][1][4] = "ivec4", // unsigned integer vectors [PL_VAR_UINT][1][1] = "uint", [PL_VAR_UINT][1][2] = "uvec2", [PL_VAR_UINT][1][3] = "uvec3", [PL_VAR_UINT][1][4] = "uvec4", }; if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM) return NULL; return types[var.type][var.dim_m][var.dim_v]; } #define PL_VAR(TYPE, NAME, M, V) \ struct pl_var pl_var_##NAME(const char *name) { \ return (struct pl_var) { \ .name = name, \ .type = PL_VAR_##TYPE, \ .dim_m = M, \ .dim_v = V, \ .dim_a = 1, \ }; \ } PL_VAR(UINT, uint, 1, 1); PL_VAR(FLOAT, float, 1, 1); PL_VAR(FLOAT, vec2, 1, 2); PL_VAR(FLOAT, vec3, 1, 3); PL_VAR(FLOAT, vec4, 1, 4); PL_VAR(FLOAT, mat2, 2, 2); PL_VAR(FLOAT, mat3, 3, 3); PL_VAR(FLOAT, mat4, 4, 4); #undef PL_VAR struct pl_var pl_var_from_fmt(const struct pl_fmt *fmt, const char *name) { static const enum pl_var_type vartypes[] = { [PL_FMT_FLOAT] = PL_VAR_FLOAT, [PL_FMT_UNORM] = PL_VAR_FLOAT, [PL_FMT_SNORM] = PL_VAR_FLOAT, [PL_FMT_UINT] = PL_VAR_UINT, [PL_FMT_SINT] = PL_VAR_SINT, }; pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes)); return (struct pl_var) { .type = vartypes[fmt->type], .name = name, .dim_v = fmt->num_components, .dim_m = 1, .dim_a = 1, }; } struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var) { size_t col_size = pl_var_type_size(var->type) * var->dim_v; return (struct pl_var_layout) { .offset = offset, .stride = col_size, .size = col_size * var->dim_m * var->dim_a, }; } struct pl_var_layout pl_buf_uniform_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var) { if (gpu->limits.max_ubo_size) { return gpu->impl->buf_uniform_layout(gpu, offset, var); } else { return (struct pl_var_layout) {0}; } } struct pl_var_layout pl_buf_storage_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var) { if (gpu->limits.max_ssbo_size) { return gpu->impl->buf_storage_layout(gpu, offset, var); } else { return (struct pl_var_layout) {0}; } } struct pl_var_layout pl_push_constant_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var) { if (gpu->limits.max_pushc_size) { return gpu->impl->push_constant_layout(gpu, offset, var); } else { return (struct pl_var_layout) {0}; } } bool pl_buf_desc_append(void *tactx, const struct pl_gpu *gpu, struct pl_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var) { struct pl_buffer_var bv = { .var = new_var }; size_t cur_size = pl_buf_desc_size(buf_desc); switch (buf_desc->type) { case PL_DESC_BUF_UNIFORM: bv.layout = pl_buf_uniform_layout(gpu, cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size) return false; break; case PL_DESC_BUF_STORAGE: bv.layout = pl_buf_storage_layout(gpu, cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size) return false; break; default: abort(); } *out_layout = bv.layout; TARRAY_APPEND(tactx, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv); return true; } size_t pl_buf_desc_size(const struct pl_desc *buf_desc) { if (!buf_desc->num_buffer_vars) return 0; const struct pl_buffer_var *last; last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1]; return last->layout.offset + last->layout.size; } void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout, const void *src_p, struct pl_var_layout src_layout) { uintptr_t src = (uintptr_t) src_p + src_layout.offset; uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset; if (src_layout.stride == dst_layout.stride) { memcpy((void *) dst, (const void *) src, src_layout.size); return; } size_t stride = PL_MIN(src_layout.stride, dst_layout.stride); uintptr_t end = src + src_layout.size; while (src < end) { memcpy((void *) dst, (const void *) src, stride); src += src_layout.stride; dst += dst_layout.stride; } } int pl_desc_namespace(const struct pl_gpu *gpu, enum pl_desc_type type) { int ret = gpu->impl->desc_namespace(gpu, type); pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT); return ret; } const char *pl_desc_access_glsl_name(enum pl_desc_access mode) { switch (mode) { case PL_DESC_ACCESS_READWRITE: return ""; case PL_DESC_ACCESS_READONLY: return "readonly"; case PL_DESC_ACCESS_WRITEONLY: return "writeonly"; default: abort(); } } const struct pl_pass *pl_pass_create(const struct pl_gpu *gpu, const struct pl_pass_params *params) { pl_assert(params->glsl_shader); switch(params->type) { case PL_PASS_RASTER: pl_assert(params->vertex_shader); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib va = params->vertex_attribs[i]; pl_assert(va.name); pl_assert(va.fmt); pl_assert(va.fmt->caps & PL_FMT_CAP_VERTEX); pl_assert(va.offset + va.fmt->texel_size <= params->vertex_stride); } const struct pl_fmt *target_fmt = params->target_dummy.params.format; pl_assert(target_fmt); pl_assert(target_fmt->caps & PL_FMT_CAP_RENDERABLE); pl_assert(!params->blend_params || target_fmt->caps & PL_FMT_CAP_BLENDABLE); break; case PL_PASS_COMPUTE: pl_assert(gpu->caps & PL_GPU_CAP_COMPUTE); break; default: abort(); } for (int i = 0; i < params->num_variables; i++) { pl_assert(gpu->caps & PL_GPU_CAP_INPUT_VARIABLES); struct pl_var var = params->variables[i]; pl_assert(var.name); pl_assert(pl_var_glsl_type_name(var)); } for (int i = 0; i < params->num_descriptors; i++) { struct pl_desc desc = params->descriptors[i]; pl_assert(desc.name); // TODO: enforce disjoint bindings if possible? } pl_assert(params->push_constants_size <= gpu->limits.max_pushc_size); pl_assert(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4)); return gpu->impl->pass_create(gpu, params); } void pl_pass_destroy(const struct pl_gpu *gpu, const struct pl_pass **pass) { if (!*pass) return; gpu->impl->pass_destroy(gpu, *pass); *pass = NULL; } static bool pl_tex_params_compat(const struct pl_tex_params a, const struct pl_tex_params b) { return a.format == b.format && a.sampleable == b.sampleable && a.renderable == b.renderable && a.storable == b.storable && a.blit_src == b.blit_src && a.blit_dst == b.blit_dst && a.host_writable == b.host_writable && a.host_readable == b.host_readable && a.sample_mode == b.sample_mode && a.address_mode == b.address_mode; } void pl_pass_run(const struct pl_gpu *gpu, const struct pl_pass_run_params *params) { const struct pl_pass *pass = params->pass; struct pl_pass_run_params new = *params; for (int i = 0; i < pass->params.num_descriptors; i++) { struct pl_desc desc = pass->params.descriptors[i]; struct pl_desc_binding db = params->desc_bindings[i]; pl_assert(db.object); switch (desc.type) { case PL_DESC_SAMPLED_TEX: { const struct pl_tex *tex = db.object; pl_assert(tex->params.sampleable); break; } case PL_DESC_STORAGE_IMG: { const struct pl_tex *tex = db.object; pl_assert(tex->params.storable); break; } case PL_DESC_BUF_UNIFORM: { const struct pl_buf *buf = db.object; pl_assert(buf->params.type == PL_BUF_UNIFORM); break; } case PL_DESC_BUF_STORAGE: { const struct pl_buf *buf = db.object; pl_assert(buf->params.type == PL_BUF_STORAGE); break; } case PL_DESC_BUF_TEXEL_UNIFORM: { const struct pl_buf *buf = db.object; pl_assert(buf->params.type == PL_BUF_TEXEL_UNIFORM); break; } case PL_DESC_BUF_TEXEL_STORAGE: { const struct pl_buf *buf = db.object; pl_assert(buf->params.type == PL_BUF_TEXEL_STORAGE); break; } default: abort(); } } for (int i = 0; i < params->num_var_updates; i++) { struct pl_var_update vu = params->var_updates[i]; pl_assert(gpu->caps & PL_GPU_CAP_INPUT_VARIABLES); pl_assert(vu.index >= 0 && vu.index < pass->params.num_variables); pl_assert(vu.data); } pl_assert(params->push_constants || !pass->params.push_constants_size); switch (pass->params.type) { case PL_PASS_RASTER: { pl_assert(params->vertex_data); switch (pass->params.vertex_type) { case PL_PRIM_TRIANGLE_LIST: pl_assert(params->vertex_count % 3 == 0); // fall through case PL_PRIM_TRIANGLE_STRIP: case PL_PRIM_TRIANGLE_FAN: pl_assert(params->vertex_count >= 3); break; } const struct pl_tex *tex = params->target; pl_assert(tex); pl_assert(pl_tex_params_dimension(tex->params) == 2); pl_assert(pl_tex_params_compat(tex->params, pass->params.target_dummy.params)); pl_assert(tex->params.renderable); struct pl_rect2d *vp = &new.viewport; struct pl_rect2d *sc = &new.scissors; // Sanitize viewport/scissors if (!vp->x0 && !vp->x1) vp->x1 = tex->params.w; if (!vp->y0 && !vp->y1) vp->y1 = tex->params.h; if (!sc->x0 && !sc->x1) sc->x1 = tex->params.w; if (!sc->y0 && !sc->y1) sc->y1 = tex->params.h; // Constrain the scissors to the target dimension (to sanitize the // underlying graphics API calls) sc->x0 = PL_MAX(0, PL_MIN(tex->params.w, sc->x0)); sc->y0 = PL_MAX(0, PL_MIN(tex->params.h, sc->y0)); sc->x1 = PL_MAX(0, PL_MIN(tex->params.w, sc->x1)); sc->y1 = PL_MAX(0, PL_MIN(tex->params.h, sc->y1)); // Scissors wholly outside target -> silently drop pass (also needed // to ensure we don't cause UB by specifying invalid scissors) if (!pl_rect_w(*sc) || !pl_rect_h(*sc)) return; pl_assert(pl_rect_w(*vp) > 0); pl_assert(pl_rect_h(*vp) > 0); pl_assert(pl_rect_w(*sc) > 0); pl_assert(pl_rect_h(*sc) > 0); break; } case PL_PASS_COMPUTE: for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) { pl_assert(params->compute_groups[i] >= 0); pl_assert(params->compute_groups[i] <= gpu->limits.max_dispatch[i]); } break; default: abort(); } if (params->target && !pass->params.load_target) pl_tex_invalidate(gpu, params->target); return gpu->impl->pass_run(gpu, &new); } void pl_gpu_flush(const struct pl_gpu *gpu) { if (gpu->impl->gpu_flush) gpu->impl->gpu_flush(gpu); } // GPU-internal helpers struct pl_var_layout std140_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std140 packing rules: // 1. The size of generic values is their size in bytes // 2. The size of vectors is the vector length * the base count, with the // exception of *vec3 which is always the same size as *vec4 // 3. Matrices are treated like arrays of column vectors // 4. The size of array rows is that of the element size rounded up to // the nearest multiple of vec4 // 5. All values are aligned to a multiple of their size (stride for arrays) size_t size = el_size * var->dim_v; if (var->dim_v == 3) size += el_size; if (var->dim_m * var->dim_a > 1) size = PL_ALIGN2(size, sizeof(float[4])); return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, size), .stride = size, .size = size * var->dim_m * var->dim_a, }; } struct pl_var_layout std430_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std430 packing rules: like std140, except arrays/matrices are always // "tightly" packed, even arrays/matrices of vec3s size_t size = el_size * var->dim_v; if (var->dim_v == 3 && var->dim_m == 1 && var->dim_a == 1) size += el_size; return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, size), .stride = size, .size = size * var->dim_m * var->dim_a, }; } void pl_buf_pool_uninit(const struct pl_gpu *gpu, struct pl_buf_pool *pool) { for (int i = 0; i < pool->num_buffers; i++) pl_buf_destroy(gpu, &pool->buffers[i]); talloc_free(pool->buffers); *pool = (struct pl_buf_pool){0}; } static bool pl_buf_params_compatible(const struct pl_buf_params *new, const struct pl_buf_params *old) { return new->type == old->type && new->size <= old->size && new->host_mapped == old->host_mapped && new->host_writable == old->host_writable && new->host_readable == old->host_readable; } static bool pl_buf_pool_grow(const struct pl_gpu *gpu, struct pl_buf_pool *pool) { const struct pl_buf *buf = pl_buf_create(gpu, &pool->current_params); if (!buf) return false; TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); PL_DEBUG(gpu, "Resized buffer pool of type %u to size %d", pool->current_params.type, pool->num_buffers); return true; } const struct pl_buf *pl_buf_pool_get(const struct pl_gpu *gpu, struct pl_buf_pool *pool, const struct pl_buf_params *params) { pl_assert(!params->initial_data); if (!pl_buf_params_compatible(params, &pool->current_params)) { pl_buf_pool_uninit(gpu, pool); pool->current_params = *params; } // Make sure we have at least one buffer available if (!pool->buffers && !pl_buf_pool_grow(gpu, pool)) return NULL; bool usable = !pl_buf_poll(gpu, pool->buffers[pool->index], 0); if (usable) goto done; if (pool->num_buffers < PL_BUF_POOL_MAX_BUFFERS) { if (pl_buf_pool_grow(gpu, pool)) goto done; // Failed growing the buffer pool, so just error out early return NULL; } // Can't resize any further, so just loop until the buffer is usable while (pl_buf_poll(gpu, pool->buffers[pool->index], 1000000000)) // 1s PL_TRACE(gpu, "Blocked on buffer pool availability! (slow path)"); done: ; const struct pl_buf *buf = pool->buffers[pool->index++]; pool->index %= pool->num_buffers; return buf; } bool pl_tex_upload_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_upload(gpu, params); struct pl_buf_params bufparams = { .type = PL_BUF_TEX_TRANSFER, .size = pl_tex_transfer_size(params), .host_writable = true, }; const struct pl_buf *buf = pl_buf_pool_get(gpu, pbo, &bufparams); if (!buf) return false; pl_buf_write(gpu, buf, 0, params->ptr, bufparams.size); struct pl_tex_transfer_params newparams = *params; newparams.buf = buf; newparams.ptr = NULL; return pl_tex_upload(gpu, &newparams); } bool pl_tex_download_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_download(gpu, params); struct pl_buf_params bufparams = { .type = PL_BUF_TEX_TRANSFER, .size = pl_tex_transfer_size(params), .host_readable = true, }; const struct pl_buf *buf = pl_buf_pool_get(gpu, pbo, &bufparams); if (!buf) return false; struct pl_tex_transfer_params newparams = *params; newparams.buf = buf; newparams.ptr = NULL; if (!pl_tex_download(gpu, &newparams)) return false; if (pl_buf_poll(gpu, buf, 0)) { PL_TRACE(gpu, "pl_tex_download without buffer: blocking (slow path)"); while (pl_buf_poll(gpu, buf, 1000000)) ; // 1 ms } return pl_buf_read(gpu, buf, 0, params->ptr, bufparams.size); } bool pl_tex_upload_texel(const struct pl_gpu *gpu, struct pl_dispatch *dp, const struct pl_tex_transfer_params *params) { const int threads = 256; const struct pl_tex *tex = params->tex; const struct pl_fmt *fmt = tex->params.format; pl_assert(params->buf); pl_assert(params->buf->params.type == PL_BUF_TEXEL_UNIFORM); pl_dispatch_reset_frame(dp); struct pl_shader *sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, true, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "data", .type = PL_DESC_BUF_TEXEL_UNIFORM, }, .object = params->buf, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, .object = params->tex, }); GLSL("vec4 color = vec4(0.0); \n" "ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n" "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d; \n", params->rc.x0, params->rc.y0, params->rc.z0, params->stride_h, params->stride_w, fmt->num_components); for (int i = 0; i < fmt->num_components; i++) GLSL("color[%d] = texelFetch(%s, base + %d).r; \n", i, buf, i); // If the transfer width is a natural multiple of the thread size, we // can skip the bounds check. Otherwise, make sure we aren't blitting out // of the range since this would violate semantics int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads; bool is_crop = params->rc.x1 != params->tex->params.w; if (is_crop && groups_x * threads != pl_rect_w(params->rc)) GLSL("if (gl_GlobalInvocationID.x < %d)\n", pl_rect_w(params->rc)); int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; GLSL("imageStore(%s, %s(pos), color);\n", img, coord_types[dims]); int groups[3] = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc) }; return pl_dispatch_compute(dp, &sh, groups); } bool pl_tex_download_texel(const struct pl_gpu *gpu, struct pl_dispatch *dp, const struct pl_tex_transfer_params *params) { const int threads = 256; const struct pl_tex *tex = params->tex; const struct pl_fmt *fmt = tex->params.format; pl_assert(params->buf); pl_assert(params->buf->params.type == PL_BUF_TEXEL_STORAGE); pl_dispatch_reset_frame(dp); struct pl_shader *sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, true, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "data", .type = PL_DESC_BUF_TEXEL_STORAGE, }, .object = params->buf, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_READONLY, }, .object = params->tex, }); int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID) + ivec3(%d, %d, %d); \n" "int base = ((pos.z * %d + pos.y) * %d + pos.x) * %d; \n" "vec4 color = imageLoad(%s, %s(pos)); \n", params->rc.x0, params->rc.y0, params->rc.z0, params->stride_h, params->stride_w, fmt->num_components, img, coord_types[dims]); int groups_x = (pl_rect_w(params->rc) + threads - 1) / threads; bool is_crop = params->rc.x1 != params->tex->params.w; if (is_crop && groups_x * threads != pl_rect_w(params->rc)) GLSL("if (gl_GlobalInvocationID.x < %d)\n", pl_rect_w(params->rc)); GLSL("{\n"); for (int i = 0; i < fmt->num_components; i++) GLSL("imageStore(%s, base + %d, vec4(color[%d])); \n", buf, i, i); GLSL("}\n"); int groups[3] = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc) }; return pl_dispatch_compute(dp, &sh, groups); } struct pl_pass_params pl_pass_params_copy(void *tactx, const struct pl_pass_params *params) { struct pl_pass_params new = *params; new.target_dummy.priv = NULL; new.cached_program = NULL; new.cached_program_len = 0; new.glsl_shader = talloc_strdup(tactx, new.glsl_shader); new.vertex_shader = talloc_strdup(tactx, new.vertex_shader); if (new.blend_params) new.blend_params = talloc_ptrdup(tactx, new.blend_params); #define DUPSTRS(name, array, num) \ do { \ (array) = TARRAY_DUP(tactx, array, num); \ for (int j = 0; j < num; j++) \ (array)[j].name = talloc_strdup(tactx, (array)[j].name); \ } while (0) DUPSTRS(name, new.variables, new.num_variables); DUPSTRS(name, new.descriptors, new.num_descriptors); DUPSTRS(name, new.vertex_attribs, new.num_vertex_attribs); for (int i = 0; i < new.num_descriptors; i++) { struct pl_desc *desc = &new.descriptors[i]; DUPSTRS(var.name, desc->buffer_vars, desc->num_buffer_vars); } #undef DUPNAMES return new; } libplacebo-0.4.0/src/gpu.h000066400000000000000000000134021324021332500153350ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #define GPU_PFN(name) __typeof__(pl_##name) *name struct pl_gpu_fns { // Destructors: These also free the corresponding objects, but they // must not be called on NULL. (The NULL checks are done by the pl_*_destroy // wrappers) void (*destroy)(const struct pl_gpu *gpu); void (*tex_destroy)(const struct pl_gpu *, const struct pl_tex *); void (*buf_destroy)(const struct pl_gpu *, const struct pl_buf *); void (*pass_destroy)(const struct pl_gpu *, const struct pl_pass *); GPU_PFN(tex_create); GPU_PFN(tex_invalidate); GPU_PFN(tex_clear); GPU_PFN(tex_blit); GPU_PFN(tex_upload); GPU_PFN(tex_download); GPU_PFN(buf_create); GPU_PFN(buf_write); GPU_PFN(buf_read); GPU_PFN(buf_poll); // optional: if NULL buffers are always free to use GPU_PFN(desc_namespace); GPU_PFN(pass_create); GPU_PFN(pass_run); GPU_PFN(gpu_flush); // optional // The following functions are optional if the corresponding pl_limit // size restriction is 0 GPU_PFN(buf_uniform_layout); GPU_PFN(buf_storage_layout); GPU_PFN(push_constant_layout); }; #undef GPU_PFN // All resources such as textures and buffers allocated from the GPU must be // destroyed before calling pl_destroy. void pl_gpu_destroy(const struct pl_gpu *gpu); // Recreates a texture with new parameters, no-op if nothing changed bool pl_tex_recreate(const struct pl_gpu *gpu, const struct pl_tex **tex, const struct pl_tex_params *params); // Incrementally build up a buffer by adding new variable elements to the // buffer, resizing buf.buffer_vars if necessary. Returns whether or not the // variable could be successfully added (which may fail if you try exceeding // the size limits of the buffer type). If successful, the layout is stored // in *out_layout bool pl_buf_desc_append(void *tactx, const struct pl_gpu *gpu, struct pl_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var); size_t pl_buf_desc_size(const struct pl_desc *buf_desc); // GPU-internal helpers: these should not be used outside of GPU implementations // Log some metadata about the created GPU void pl_gpu_print_info(const struct pl_gpu *gpu, enum pl_log_level lev); // Sort the pl_format list into an optimal order. This tries to prefer formats // supporting more capabilities, while also trying to maintain a sane order // in terms of bit depth / component index. void pl_gpu_sort_formats(struct pl_gpu *gpu); // Pretty-print the format list void pl_gpu_print_formats(const struct pl_gpu *gpu, enum pl_log_level lev); // Look up the right GLSL image format qualifier from a partially filled-in // pl_fmt, or NULL if the format does not have a legal matching GLSL name. // // Warning: If `fmt->emulated` is true, this function makes the hard assumption // that 3-channel formats are being emulated as equivalent 4-channel formats! const char *pl_fmt_glsl_format(const struct pl_fmt *fmt); // Compute the total size (in bytes) of a texture transfer operation size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par); // Layout rules for GLSL's packing modes struct pl_var_layout std140_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var); struct pl_var_layout std430_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var); // A hard-coded upper limit on a pl_buf_pool's size, to prevent OOM loops #define PL_BUF_POOL_MAX_BUFFERS 8 // A pool of buffers, which can grow as needed struct pl_buf_pool { struct pl_buf_params current_params; const struct pl_buf **buffers; int num_buffers; int index; }; void pl_buf_pool_uninit(const struct pl_gpu *gpu, struct pl_buf_pool *pool); // Note: params->initial_data is *not* supported const struct pl_buf *pl_buf_pool_get(const struct pl_gpu *gpu, struct pl_buf_pool *pool, const struct pl_buf_params *params); // Helper that wraps pl_tex_upload/download using texture upload buffers to // ensure that params->buf is always set. bool pl_tex_upload_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo, const struct pl_tex_transfer_params *params); bool pl_tex_download_pbo(const struct pl_gpu *gpu, struct pl_buf_pool *pbo, const struct pl_tex_transfer_params *params); // This requires that params.buf has been set and is of type PL_BUF_TEXEL_* bool pl_tex_upload_texel(const struct pl_gpu *gpu, struct pl_dispatch *dp, const struct pl_tex_transfer_params *params); bool pl_tex_download_texel(const struct pl_gpu *gpu, struct pl_dispatch *dp, const struct pl_tex_transfer_params *params); // Make a deep-copy of the pass params. Note: cached_program etc. are not // copied, but cleared explicitly. struct pl_pass_params pl_pass_params_copy(void *tactx, const struct pl_pass_params *params); libplacebo-0.4.0/src/include/000077500000000000000000000000001324021332500160145ustar00rootroot00000000000000libplacebo-0.4.0/src/include/libplacebo/000077500000000000000000000000001324021332500201105ustar00rootroot00000000000000libplacebo-0.4.0/src/include/libplacebo/colorspace.h000066400000000000000000000347541324021332500224300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COLORSPACE_H_ #define LIBPLACEBO_COLORSPACE_H_ #include #include // The underlying color representation (e.g. RGB, XYZ or YCbCr) enum pl_color_system { PL_COLOR_SYSTEM_UNKNOWN = 0, // YCbCr-like color systems: PL_COLOR_SYSTEM_BT_601, // ITU-R Rec. BT.601 (SD) PL_COLOR_SYSTEM_BT_709, // ITU-R Rec. BT.709 (HD) PL_COLOR_SYSTEM_SMPTE_240M, // SMPTE-240M PL_COLOR_SYSTEM_BT_2020_NC, // ITU-R Rec. BT.2020 (non-constant luminance) PL_COLOR_SYSTEM_BT_2020_C, // ITU-R Rec. BT.2020 (constant luminance) PL_COLOR_SYSTEM_YCGCO, // YCgCo (derived from RGB) // Other color systems: PL_COLOR_SYSTEM_RGB, // Red, Green and Blue PL_COLOR_SYSTEM_XYZ, // CIE 1931 XYZ, pre-encoded with gamma 2.6 PL_COLOR_SYSTEM_COUNT }; bool pl_color_system_is_ycbcr_like(enum pl_color_system sys); // Returns true for color systems that are linear transformations of the RGB // equivalent, i.e. are simple matrix multiplications. For color systems with // this property, pl_get_decoding_matrix is sufficient for conversion to RGB. bool pl_color_system_is_linear(enum pl_color_system sys); // Guesses the best YCbCr-like colorspace based on a image given resolution. // This only picks conservative values. (In particular, BT.2020 is never // auto-guessed, even for 4K resolution content) enum pl_color_system pl_color_system_guess_ycbcr(int width, int height); // The numerical range of the representation (where applicable). enum pl_color_levels { PL_COLOR_LEVELS_UNKNOWN = 0, PL_COLOR_LEVELS_TV, // TV range, e.g. 16-235 PL_COLOR_LEVELS_PC, // PC range, e.g. 0-255 PL_COLOR_LEVELS_COUNT, }; // The alpha representation mode. enum pl_alpha_mode { PL_ALPHA_UNKNOWN = 0, // or no alpha channel present PL_ALPHA_INDEPENDENT, // alpha channel is separate from the video PL_ALPHA_PREMULTIPLIED, // alpha channel is multiplied into the colors }; // The underlying bit-wise representation of a color sample. For example, // a 10-bit TV-range YCbCr value uploaded to a 16 bit texture would have // sample_depth=16 color_depth=10 bit_shift=0. // // For another example, a 12-bit XYZ full range sample shifted to 16-bits with // the lower 4 bits all set to 0 would have sample_depth=16 color_depth=12 // bit_shift=4. (libavcodec likes outputting this type of `xyz12`) // // To explain the meaning of `sample_depth` further; the consideration factor // here is the fact that GPU sampling will normalized the sampled color to the // range 0.0 - 1.0 in a manner dependent on the number of bits in the texture // format. So if you upload a 10-bit YCbCr value unpadded as 16-bit color // samples, all of the sampled values will be extremely close to 0.0. In such a // case, `pl_color_repr_normalize` would return a high scaling factor, which // would pull the color up to their 16-bit range. struct pl_bit_encoding { int sample_depth; // the number of bits the color is stored/sampled as int color_depth; // the effective number of bits of the color information int bit_shift; // a representational bit shift applied to the color }; // Returns whether two bit encodings are exactly identical. bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2); // Struct describing the underlying color system and representation. This // information is needed to convert an encoded color to a normalized RGB triple // in the range 0-1. struct pl_color_repr { enum pl_color_system sys; enum pl_color_levels levels; enum pl_alpha_mode alpha; struct pl_bit_encoding bits; // or {0} if unknown }; // Some common color representations. It's worth pointing out that all of these // presets leave `alpha` and `bits` as unknown - that is, only the system and // levels are predefined extern const struct pl_color_repr pl_color_repr_unknown; extern const struct pl_color_repr pl_color_repr_rgb; extern const struct pl_color_repr pl_color_repr_sdtv; extern const struct pl_color_repr pl_color_repr_hdtv; // also Blu-ray extern const struct pl_color_repr pl_color_repr_uhdtv; // SDR, NCL system extern const struct pl_color_repr pl_color_repr_jpeg; // Returns whether two colorspace representations are exactly identical. bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2); // Replaces unknown values in the first struct by those of the second struct. void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *new); // This function normalizes the color representation such that // color_depth=sample_depth and bit_shift=0; and returns the scaling factor // that must be multiplied into the color value to accomplish this, assuming // it has already been sampled by the GPU. If unknown, the color and sample // depth will both be inferred as 8 bits for the purposes of this conversion. float pl_color_repr_normalize(struct pl_color_repr *repr); // The colorspace's primaries (gamut) enum pl_color_primaries { PL_COLOR_PRIM_UNKNOWN = 0, // Standard gamut: PL_COLOR_PRIM_BT_601_525, // ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C) PL_COLOR_PRIM_BT_601_625, // ITU-R Rec. BT.601 (625-line = PAL, SECAM) PL_COLOR_PRIM_BT_709, // ITU-R Rec. BT.709 (HD), also sRGB PL_COLOR_PRIM_BT_470M, // ITU-R Rec. BT.470 M // Wide gamut: PL_COLOR_PRIM_BT_2020, // ITU-R Rec. BT.2020 (UltraHD) PL_COLOR_PRIM_APPLE, // Apple RGB PL_COLOR_PRIM_ADOBE, // Adobe RGB (1998) PL_COLOR_PRIM_PRO_PHOTO, // ProPhoto RGB (ROMM) PL_COLOR_PRIM_CIE_1931, // CIE 1931 RGB primaries PL_COLOR_PRIM_DCI_P3, // DCI-P3 (Digital Cinema) PL_COLOR_PRIM_V_GAMUT, // Panasonic V-Gamut (VARICAM) PL_COLOR_PRIM_S_GAMUT, // Sony S-Gamut PL_COLOR_PRIM_COUNT }; bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim); // Guesses the best primaries based on a resolution. This always guesses // conservatively, i.e. it will never return a wide gamut color space even if // the resolution is 4K. enum pl_color_primaries pl_color_primaries_guess(int width, int height); // The colorspace's transfer function (gamma / EOTF) enum pl_color_transfer { PL_COLOR_TRC_UNKNOWN = 0, // Standard dynamic range: PL_COLOR_TRC_BT_1886, // ITU-R Rec. BT.1886 (CRT emulation + OOTF) PL_COLOR_TRC_SRGB, // IEC 61966-2-4 sRGB (CRT emulation) PL_COLOR_TRC_LINEAR, // Linear light content PL_COLOR_TRC_GAMMA18, // Pure power gamma 1.8 PL_COLOR_TRC_GAMMA22, // Pure power gamma 2.2 PL_COLOR_TRC_GAMMA28, // Pure power gamma 2.8 PL_COLOR_TRC_PRO_PHOTO, // ProPhoto RGB (ROMM) // High dynamic range: PL_COLOR_TRC_PQ, // ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048 PL_COLOR_TRC_HLG, // ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67 PL_COLOR_TRC_V_LOG, // Panasonic V-Log (VARICAM) PL_COLOR_TRC_S_LOG1, // Sony S-Log1 PL_COLOR_TRC_S_LOG2, // Sony S-Log2 PL_COLOR_TRC_COUNT }; // Returns the nominal peak of a given transfer function, relative to the // reference white. This refers to the highest encodable signal level. // Always equal to 1.0 for SDR curves. float pl_color_transfer_nominal_peak(enum pl_color_transfer trc); static inline bool pl_color_transfer_is_hdr(enum pl_color_transfer trc) { return pl_color_transfer_nominal_peak(trc) > 1.0; } // This defines the standard reference white level (in cd/m^2) that is assumed // throughout standards such as those from by ITU-R, EBU, etc. // This is particularly relevant for HDR conversions, as this value is used // as a reference for conversions between absolute transfer curves (e.g. PQ) // and relative transfer curves (e.g. SDR, HLG). #define PL_COLOR_REF_WHITE 100.0 // The semantic interpretation of the decoded image, how is it mastered? enum pl_color_light { PL_COLOR_LIGHT_UNKNOWN = 0, PL_COLOR_LIGHT_DISPLAY, // Display-referred, output as-is PL_COLOR_LIGHT_SCENE_HLG, // Scene-referred, HLG OOTF PL_COLOR_LIGHT_SCENE_709_1886, // Scene-referred, OOTF = BT.709+1886 interaction PL_COLOR_LIGHT_SCENE_1_2, // Scene-referred, OOTF = gamma 1.2 PL_COLOR_LIGHT_COUNT }; bool pl_color_light_is_scene_referred(enum pl_color_light light); // Rendering intent for colorspace transformations. These constants match the // ICC specification (Table 23) enum pl_rendering_intent { PL_INTENT_PERCEPTUAL = 0, PL_INTENT_RELATIVE_COLORIMETRIC = 1, PL_INTENT_SATURATION = 2, PL_INTENT_ABSOLUTE_COLORIMETRIC = 3 }; // Struct describing a physical color space. This information is needed to // turn a normalized RGB triple into its physical meaning, as well as to convert // between color spaces. struct pl_color_space { enum pl_color_primaries primaries; enum pl_color_transfer transfer; enum pl_color_light light; // The highest value that occurs in the signal, relative to the reference // white. (0 = unknown) float sig_peak; // The average light level that occurs in the signal, relative to the // reference white. (0 = unknown) float sig_avg; }; // Replaces unknown values in the first struct by those of the second struct. void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *new); // Returns whether two colorspaces are exactly identical. bool pl_color_space_equal(struct pl_color_space c1, struct pl_color_space c2); // Some common color spaces extern const struct pl_color_space pl_color_space_unknown; extern const struct pl_color_space pl_color_space_srgb; extern const struct pl_color_space pl_color_space_bt709; extern const struct pl_color_space pl_color_space_hdr10; extern const struct pl_color_space pl_color_space_bt2020_hlg; extern const struct pl_color_space pl_color_space_monitor; // typical display // This represents metadata about extra operations to perform during colorspace // conversion, which correspond to artistic adjustments of the color. struct pl_color_adjustment { // Brightness boost. 0.0 = neutral, 1.0 = solid white, -1.0 = solid black float brightness; // Contrast boost. 1.0 = neutral, 0.0 = solid black float contrast; // Saturation gain. 1.0 = neutral, 0.0 = grayscale float saturation; // Hue shift, corresponding to a rotation around the [U, V] subvector. // Only meaningful for YCbCr-like colorspaces. 0.0 = neutral float hue; // Gamma adjustment. 1.0 = neutral, 0.0 = solid black float gamma; }; // A struct pre-filled with all-neutral values. extern const struct pl_color_adjustment pl_color_adjustment_neutral; // Represents the chroma placement with respect to the luma samples. This is // only relevant for YCbCr-like colorspaces with chroma subsampling. enum pl_chroma_location { PL_CHROMA_UNKNOWN = 0, PL_CHROMA_LEFT, // MPEG2/4, H.264 PL_CHROMA_CENTER, // MPEG1, JPEG PL_CHROMA_COUNT, }; // Fills *x and *y with the offset in luma pixels corresponding to a given // chroma location. void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y); // Represents a single CIE xy coordinate (e.g. CIE Yxy with Y = 1.0) struct pl_cie_xy { float x, y; }; // Recovers (X / Y) from a CIE xy value. static inline float pl_cie_X(struct pl_cie_xy xy) { return xy.x / xy.y; } // Recovers (Z / Y) from a CIE xy value. static inline float pl_cie_Z(struct pl_cie_xy xy) { return (1 - xy.x - xy.y) / xy.y; } // Represents the raw physical primaries corresponding to a color space. struct pl_raw_primaries { struct pl_cie_xy red, green, blue, white; }; // Returns the raw primaries for a given color space. const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim); // Returns an RGB->XYZ conversion matrix for a given set of primaries. // Multiplying this into the RGB color transforms it to CIE XYZ, centered // around the color space's white point. struct pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim); // Similar to pl_get_rgb2xyz_matrix, but gives the inverse transformation. struct pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim); // Returns a primary adaptation matrix, which converts from one set of // primaries to another. This is an RGB->RGB transformation. For rendering // intents other than PL_INTENT_ABSOLUTE_COLORIMETRIC, the white point is // adapted using the Bradford matrix. struct pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent); // Returns a color decoding matrix for a given combination of source color // representation and adjustment parameters. This mutates the color_repr to // reflect the change. If `params` is left as NULL, it defaults to // &pl_color_adjustment_neutral. // // This function always performs a conversion to RGB; conversions from // arbitrary color representations to other arbitrary other color // representations are currently not supported. Not all color systems support // all of the color adjustment parameters. (In particular, hue/sat adjustments // are currently only supported for YCbCr-like color systems) // // Note: For BT.2020 constant-luminance, this outputs chroma information in the // range [-0.5, 0.5]. Since the CL system conversion is non-linear, further // processing must be done by the caller. The channel order is CrYCb. // // Note: For XYZ system, the input/encoding gamma must be pre-applied by the // user, typically this has a value of 2.6. struct pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params); #endif // LIBPLACEBO_COLORSPACE_H_ libplacebo-0.4.0/src/include/libplacebo/common.h000066400000000000000000000077561324021332500215700ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COMMON_H_ #define LIBPLACEBO_COMMON_H_ #include #include // Some common utility types. These are overloaded to support 2D, 3D and // integer/float variants. struct pl_rect2d { int x0, y0; int x1, y1; }; struct pl_rect3d { int x0, y0, z0; int x1, y1, z1; }; struct pl_rect2df { float x0, y0; float x1, y1; }; struct pl_rect3df { float x0, y0; float x1, y1; float z0, z1; }; // These macros will work for any of the above pl_rect variants (with enough // dimensions). Careful: double-evaluation hazard #define pl_rect_w(r) ((r).x1 - (r).x0) #define pl_rect_h(r) ((r).y1 - (r).y0) #define pl_rect_d(r) ((r).z1 - (r).z0) #define pl_rect2d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1) #define pl_rect3d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1 && \ (a).z0 == (b).z0 && (a).z1 == (b).z1) // "Normalize" a rectangle: This ensures d1 >= d0 for all dimensions. void pl_rect2d_normalize(struct pl_rect2d *rc); void pl_rect3d_normalize(struct pl_rect3d *rc); // Represents a row-major matrix, i.e. the following matrix // [ a11 a12 a13 ] // [ a21 a22 a23 ] // [ a31 a32 a33 ] // is represented in C like this: // { { a11, a12, a13 }, // { a21, a22, a23 }, // { a31, a32, a33 } }; struct pl_matrix3x3 { float m[3][3]; }; extern const struct pl_matrix3x3 pl_matrix3x3_identity; // Applies a matrix to a float vector in-place. void pl_matrix3x3_apply(const struct pl_matrix3x3 *mat, float vec[3]); // Scales a color matrix by a linear factor. void pl_matrix3x3_scale(struct pl_matrix3x3 *mat, float scale); // Inverts a matrix. Only use where precision is not that important. void pl_matrix3x3_invert(struct pl_matrix3x3 *mat); // Composes/multiplies two matrices. Multiples B into A, i.e. // A := A * B void pl_matrix3x3_mul(struct pl_matrix3x3 *a, const struct pl_matrix3x3 *b); // Represents an affine transformation, which is basically a 3x3 matrix // together with a column vector to add onto the output. struct pl_transform3x3 { struct pl_matrix3x3 mat; float c[3]; }; extern const struct pl_transform3x3 pl_transform3x3_identity; // Applies a transform to a float vector in-place. void pl_transform3x3_apply(const struct pl_transform3x3 *t, float vec[3]); // Scales the output of a transform by a linear factor. Since an affine // transformation is non-linear, this does not commute. If you want to scale // the *input* of a transform, use pl_matrix3x3_scale on `t.mat`. void pl_transform3x3_scale(struct pl_transform3x3 *t, float scale); // Inverts a transform. Only use where precision is not that important. void pl_transform3x3_invert(struct pl_transform3x3 *t); // 2D analog of the above structs. Since these are featured less prominently, // we omit some of the other helper functions. struct pl_matrix2x2 { float m[2][2]; }; extern const struct pl_matrix2x2 pl_matrix2x2_identity; void pl_matrix2x2_apply(const struct pl_matrix2x2 *mat, float vec[2]); struct pl_transform2x2 { struct pl_matrix2x2 mat; float c[2]; }; extern const struct pl_transform2x2 pl_transform2x2_identity; void pl_transform2x2_apply(const struct pl_transform2x2 *t, float vec[2]); #endif // LIBPLACEBO_COMMON_H_ libplacebo-0.4.0/src/include/libplacebo/context.h000066400000000000000000000076731324021332500217620ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_CONTEXT_H_ #define LIBPLACEBO_CONTEXT_H_ #include "config.h" // Meta-object to serve as a global entrypoint for the purposes of resource // allocation, logging, etc.. Note on thread safety: the pl_context and // everything allocated from it are *not* thread-safe except where otherwise // noted. That is, multiple pl_context objects are safe to use from multiple // threads, but a single pl_context and all of its derived resources and // contexts must be used from a single thread at all times. struct pl_context; // The log level associated with a given log message. enum pl_log_level { PL_LOG_NONE = 0, PL_LOG_FATAL, // results in total loss of function of a major component PL_LOG_ERR, // serious error; may result in degraded function PL_LOG_WARN, // warning; potentially bad, probably user-relevant PL_LOG_INFO, // informational message, also potentially harmless errors PL_LOG_DEBUG, // verbose debug message, informational PL_LOG_TRACE, // very noisy trace of activity,, usually benign PL_LOG_ALL = PL_LOG_TRACE, }; // Global options for a pl_context. struct pl_context_params { // Logging callback. All messages, informational or otherwise, will get // redirected to this callback. The logged messages do not include trailing // newlines. Optional. void (*log_cb)(void *log_priv, enum pl_log_level level, const char *msg); void *log_priv; // The current log level. Controls the level of message that will be // redirected ot the log callback. Setting this to PL_LOG_ALL means all // messages will be forwarded, but doing so indiscriminately can result // in increased CPU usage as it may enable extra debug paths based on the // configured log level. enum pl_log_level log_level; }; // Creates a new, blank pl_context. The argument `api_ver` must be given as // PL_API_VER (this is used to detect ABI mismatch due to broken linking). // `params` defaults to pl_context_default_params if left as NULL. // Returns NULL on failure. struct pl_context *pl_context_create(int api_ver, const struct pl_context_params *params); // Equal to (struct pl_context_params) {0} extern const struct pl_context_params pl_context_default_params; // Except where otherwise noted, all objects allocated from this pl_context // must be destroyed by the user before the pl_context is destroyed. // // Note: As a rule of thumb, all _destroy functions take the pointer to the // object to free as their parameter. This pointer is overwritten by NULL // afterwards. Calling a _destroy function on &{NULL} is valid, but calling it // on NULL itself is invalid. void pl_context_destroy(struct pl_context **ctx); // Two simple, stream-based loggers. You can use these as the log_cb. If you // also set log_priv to a FILE* (e.g. stdout or stderr) it will be printed // there; otherwise, it will be printed to stdout or stderr depending on the // log level. // // The version with colors will use ANSI escape sequences to indicate the log // level. The version without will use explicit prefixes. void pl_log_simple(void *stream, enum pl_log_level level, const char *msg); void pl_log_color(void *stream, enum pl_log_level level, const char *msg); #endif // LIBPLACEBO_CONTEXT_H_ libplacebo-0.4.0/src/include/libplacebo/dispatch.h000066400000000000000000000064461324021332500220720ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DISPATCH_H_ #define LIBPLACEBO_DISPATCH_H_ #include #include struct pl_dispatch; // Creates a new shader dispatch object. This object provides a translation // layer between generated shaders (pl_shader) and the ra context such that it // can be used to execute shaders. This dispatch object will also provide // shader caching (for efficient re-use). struct pl_dispatch *pl_dispatch_create(struct pl_context *ctx, const struct pl_gpu *gpu); void pl_dispatch_destroy(struct pl_dispatch **dp); // Returns a blank pl_shader object, suitable for recording rendering commands. // For more information, see the header documentation in `shaders/*.h`. The // generated shaders always have unique identifiers, and can therefore be // safely merged together. struct pl_shader *pl_dispatch_begin(struct pl_dispatch *dp); // Dispatch a generated shader (via the pl_shader mechanism). The results of // shader execution will be rendered to `target`. Returns whether or not the // dispatch was successful. This operation will take over ownership of the // pl_shader passed to it, and return it back to the internal pool. // If `rc` is NULL, renders to the entire texture. // If set, `blend_params` enables and controls blending for this pass. bool pl_dispatch_finish(struct pl_dispatch *dp, struct pl_shader **sh, const struct pl_tex *target, const struct pl_rect2d *rc, const struct pl_blend_params *blend_params); // A variant of `pl_dispatch_finish`, this one only dispatches a compute shader // that has no output. // // Note: There is currently no way to actually construct such a shader with the // currently available public APIs. (However, it's still used internally, and // may be needed in the future) bool pl_dispatch_compute(struct pl_dispatch *dp, struct pl_shader **sh, int dispatch_size[3]); // Cancel an active shader without submitting anything. Useful, for example, // if the shader was instead merged into a different shader. void pl_dispatch_abort(struct pl_dispatch *dp, struct pl_shader **sh); // Reset/increments the internal counters of the pl_dispatch. This should be // called whenever the user is going to begin with a new frame, in order to // ensure that the "same" calls to pl_dispatch_begin end up creating shaders // with the same identifier. Failing to follow this rule means shader caching, // as well as features such as temporal dithering, will not work correctly. void pl_dispatch_reset_frame(struct pl_dispatch *dp); #endif // LIBPLACEBO_DISPATCH_H libplacebo-0.4.0/src/include/libplacebo/dither.h000066400000000000000000000027371324021332500215510ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DITHER_H_ #define LIBPLACEBO_DITHER_H_ // Generates a deterministic NxN bayer (ordered) dither matrix, storing the // result in `data`. `size` must be a power of two. The resulting matrix will // be roughly uniformly distributed within the range [0,1). void pl_generate_bayer_matrix(float *data, int size); // Generates a random NxN blue noise texture. storing the result in `data`. // `size` must be a positive power of two no larger than 256. The resulting // texture will be roughly uniformly distributed within the range [0,1). // // Note: This function is very, *very* slow for large sizes. Generating a // dither matrix with size 256 can take several seconds on a modern processor. void pl_generate_blue_noise(float *data, int size); #endif // LIBPLACEBO_DITHER_H_ libplacebo-0.4.0/src/include/libplacebo/filters.h000066400000000000000000000353051324021332500217370ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_FILTER_KERNELS_H_ #define LIBPLACEBO_FILTER_KERNELS_H_ #include #include #define PL_FILTER_MAX_PARAMS 2 // Represents a single filter function, i.e. kernel or windowing function. // To invoke a filter with a different configuration than the default, you can // make a copy of this struct and modify the non-const fields before passing it // to pl_filter_initialize. struct pl_filter_function { // These bools indicate whether or not `radius` and `params` may be // modified by the user. bool resizable; bool tunable[PL_FILTER_MAX_PARAMS]; // The underlying filter function itself: Computes the weight as a function // of the offset. All filter functions must be normalized such that x=0 is // the center point, and in particular weight(0) = 1.0. The functions may // be undefined for values of x outside [0, radius]. double (*weight)(const struct pl_filter_function *k, double x); // This field may be used to adjust the function's radius. Defaults to the // the radius needed to represent a single filter lobe (tap). If the // function is not resizable, this field must not be modified - otherwise // the result of filter evaluation is undefined. float radius; // These fields may be used to adjust the function. Defaults to the // function's preferred defaults. if the relevant setting is not tunable, // they are ignored entirely. float params[PL_FILTER_MAX_PARAMS]; }; bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b); // Box filter: Entirely 1.0 within the radius, entirely 0.0 outside of it. // This is also sometimes called a Dirichlet window extern const struct pl_filter_function pl_filter_function_box; // Triangle filter: Linear transitions from 1.0 at x=0 to 0.0 at x=radius. // This is also sometimes called a Bartlett window. extern const struct pl_filter_function pl_filter_function_triangle; // Hann function: Cosine filter named after Julius von Hann. Also commonly // mislabeled as a "Hanning" function, due to its similary to the Hamming // function. extern const struct pl_filter_function pl_filter_function_hann; // Hamming function: Cosine filter named after Richard Hamming. extern const struct pl_filter_function pl_filter_function_hamming; // Welch filter: Polynomial function consisting of a single parabolic section. extern const struct pl_filter_function pl_filter_function_welch; // Kaiser filter: Approximation of the DPSS window using Bessel functions. // Also sometimes called a Kaiser-Bessel window. // Parameter [0]: Shape (alpha). Determines the trade-off between the main lobe // and the side lobes. extern const struct pl_filter_function pl_filter_function_kaiser; // Blackman filter: Cosine filter named after Ralph Beebe Blackman. // Parameter [0]: Scale (alpha). Influences the shape. The defaults result in // zeros at the third and fourth sidelobes. extern const struct pl_filter_function pl_filter_function_blackman; // Gaussian function: Similar to the Gaussian distribution, this defines a // bell curve function. // Parameter [0]: Scale (t), increasing makes the result blurrier. extern const struct pl_filter_function pl_filter_function_gaussian; // Sinc function: Widely used for both kernels and windows, sinc(x) = sin(x)/x. extern const struct pl_filter_function pl_filter_function_sinc; // Jinc function: Similar to sinc, but extended to the 2D domain. Widely // used as the kernel of polar (EWA) filters. Also sometimes called a Sombrero // function. extern const struct pl_filter_function pl_filter_function_jinc; // Sphinx function: Similar to sinc and jinx, but extended to the 3D domain. // The name is derived from "spherical" sinc. Can be used to filter 3D signals // in theory. extern const struct pl_filter_function pl_filter_function_sphinx; // B/C-tunable Spline function: This is a family of commonly used spline // functions with two tunable parameters. Does not need to be windowed. // Parameter [0]: "B" // Parameter [1]: "C" // Due to its populariy, this function is available in several variants. // B = 0.0, C = 0.0: "base" bcspline, AKA Hermite spline (blocky) // B = 0.0, C = 0.5: Catmull-Rom filter (sharp) // B = 1/3, C = 1/3: Mitchell-Netravali filter (soft, doesn't ring) // B ≈ 0.37, C ≈ 0.31: Robidoux filter (used by ImageMagick) // B ≈ 0.26, C ≈ 0.37: RobidouxSharp filter. (sharper variant of Robidoux) extern const struct pl_filter_function pl_filter_function_bcspline; extern const struct pl_filter_function pl_filter_function_catmull_rom; extern const struct pl_filter_function pl_filter_function_mitchell; extern const struct pl_filter_function pl_filter_function_robidoux; extern const struct pl_filter_function pl_filter_function_robidouxsharp; // Bicubic function: Very smooth and free of ringing, but very blurry. Does not // need to be windowed. extern const struct pl_filter_function pl_filter_function_bicubic; // Piecewise approximations of the Lanczos filter function (sinc-windowed // sinc). Referred to as "spline16", "spline36" and "spline64" mainly for // historical reasons, based on their fixed radii of 2, 3 and 4 (respectively). // These do not need to be windowed. extern const struct pl_filter_function pl_filter_function_spline16; extern const struct pl_filter_function pl_filter_function_spline36; extern const struct pl_filter_function pl_filter_function_spline64; struct pl_named_filter_function { const char *name; const struct pl_filter_function *function; }; // As a convenience, this contains a list of all supported filter function, // terminated by a single {0} entry. extern const struct pl_named_filter_function pl_named_filter_functions[]; // Returns a filter function with a given name, or NULL on failure. Safe to // call on name = NULL. const struct pl_named_filter_function *pl_find_named_filter_function(const char *name); // Represents a particular configuration/combination of filter functions to // form a filter. struct pl_filter_config { const struct pl_filter_function *kernel; // The kernel function const struct pl_filter_function *window; // The windowing function. Optional // Represents a clamping coefficient for negative weights. A value of 0.0 // (the default) represents no clamping. A value of 1.0 represents full // clamping, i.e. all negative weights will be clamped to 0. Values in // between will be linearly scaled. float clamp; // Additional blur coefficient. This effectively stretches the kernel, // without changing the effective radius of the filter radius. Setting this // to a value of 0.0 is equivalent to disabling it. Values significantly // below 1.0 may seriously degrade the visual output, and should be used // with care. float blur; // Additional taper coefficient. This essentially flattens the function's // center. The values within [-taper, taper] will return 1.0, with the // actual function being squished into the remainder of [taper, radius]. // Defaults to 0.0. float taper; // If true, this filter is intended to be used as a polar/2D filter (EWA) // instead of a separable/1D filter. Does not affect the actual sampling, // but provides information about how the results are to be interpreted. bool polar; }; bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b); // Samples a given filter configuration at a given x coordinate, while // respecting all parameters of the configuration. double pl_filter_sample(const struct pl_filter_config *c, double x); // A list of built-in filter configurations. Since they are just combinations // of the above filter functions, they are not described in much further // detail. extern const struct pl_filter_config pl_filter_spline16; // 2 taps extern const struct pl_filter_config pl_filter_spline36; // 3 taps extern const struct pl_filter_config pl_filter_spline64; // 4 taps extern const struct pl_filter_config pl_filter_box; // AKA nearest extern const struct pl_filter_config pl_filter_triangle; // AKA bilinear extern const struct pl_filter_config pl_filter_gaussian; // Sinc family (all configured to 3 taps): extern const struct pl_filter_config pl_filter_sinc; // unwindowed, extern const struct pl_filter_config pl_filter_lanczos; // sinc-sinc extern const struct pl_filter_config pl_filter_ginseng; // sinc-jinc extern const struct pl_filter_config pl_filter_ewa_jinc; // unwindowed extern const struct pl_filter_config pl_filter_ewa_lanczos; // jinc-jinc extern const struct pl_filter_config pl_filter_ewa_ginseng; // jinc-sinc extern const struct pl_filter_config pl_filter_ewa_hann; // jinc-hann extern const struct pl_filter_config pl_filter_haasnsoft; // blurred ewa_hann // Spline family extern const struct pl_filter_config pl_filter_bicubic; extern const struct pl_filter_config pl_filter_catmull_rom; extern const struct pl_filter_config pl_filter_mitchell; extern const struct pl_filter_config pl_filter_robidoux; extern const struct pl_filter_config pl_filter_robidouxsharp; extern const struct pl_filter_config pl_filter_ewa_robidoux; extern const struct pl_filter_config pl_filter_ewa_robidouxsharp; struct pl_named_filter_config { const char *name; const struct pl_filter_config *filter; }; // As a convenience, this contains a list of built-in filter configurations, // terminated by a single {0} entry. extern const struct pl_named_filter_config pl_named_filters[]; // Returns a filter config with a given name, or NULL on failure. Safe to call // on name = NULL. const struct pl_named_filter_config *pl_find_named_filter(const char *name); // Parameters for filter generation. struct pl_filter_params { // The particular filter configuration to be sampled. config.kernel must // be set to a valid pl_filter_function. struct pl_filter_config config; // The precision of the resulting LUT. A value of 64 should be fine for // most practical purposes, but higher or lower values may be justified // depending on the use case. This value must be set to something > 0. int lut_entries; // When set to values above 1.0, the filter will be computed at a size // larger than the radius would otherwise require, in order to prevent // aliasing when downscaling. In practice, this should be set to the // inverse of the scaling ratio, i.e. src_size / dst_size. float filter_scale; // --- polar filers only (config.polar) // As a micro-optimization, all samples below this cutoff value will be // ignored when updating the cutoff radius. Setting it to a value of 0.0 // disables this optimization. float cutoff; // --- separable filters only (!config.polar) // Indicates the maximum row size that is supported by the calling code, or // 0 for no limit. int max_row_size; // Indicates the row stride alignment. For some use cases (e.g. uploading // the weights as a texture), there are certain alignment requirements for // each row. The chosen row_size will always be a multiple of this value. // Specifying 0 indicates no alignment requirements. int row_stride_align; }; // Represents an initialized instance of a particular filter, with a // precomputed LUT. The interpretation of the LUT depends on the type of the // filter (polar or separable). struct pl_filter { // Deep copy of the parameters, for convenience. struct pl_filter_params params; // Contains the true radius of the computed filter. This may be // larger than `config.kernel->radius` depending on the `scale` passed to // pl_filter_generate. This is only relevant for polar filters, where it // affects the value range of *weights. float radius; // The computed look-up table (LUT). For polar filters, this is interpreted // as a 1D array with dimensions [lut_entries] containing the raw filter // samples on the scale [0, radius]. For separable (non-polar) filters, // this is interpreted as a 2D array with dimensions // [lut_entries][row_stride]. The inner rows contain the `row_size` samples // to convolve with the corresponding input pixels. The outer coordinate is // used to very the fractional offset (phase). So for example, if the // sample position to reconstruct is directly aligned with the source // texels, you would use the values from weights[0]. If the sample position // to reconstruct is exactly half-way between two source texels (180° out // of phase), you would use the values from weights[lut_entries/2]. const float *weights; // --- polar filters only (params.config.polar) // Contains the effective cut-off radius for this filter. Samples outside // of this cutoff radius may be discarded. Computed based on the `cutoff` // value specified at filter generation. Only relevant for polar filters // since skipping samples outside of the radius can be a significant // performance gain for EWA sampling. float radius_cutoff; // --- separable filters only (!params.config.polar) // The number of source texels to convolve over for each row. This value // will never exceed the given `max_row_size`. If the filter ends up // cut off because of this, the bool `insufficient` will be set to true. int row_size; bool insufficient; // The separation (in *weights) between each row of the filter. Always // a multiple of params.row_stride_align. int row_stride; }; // Generate (compute) a filter instance based on a given filter configuration. // The resulting pl_filter must be freed with `pl_filter_free` when no longer // needed. Returns NULL if filter generation fails due to invalid parameters // (i.e. missing a required parameter). // The resulting pl_filter is implicitly destroyed when the pl_context is // destroyed. const struct pl_filter *pl_filter_generate(struct pl_context *ctx, const struct pl_filter_params *params); void pl_filter_free(const struct pl_filter **filter); #endif // LIBPLACEBO_FILTER_KERNELS_H_ libplacebo-0.4.0/src/include/libplacebo/gpu.h000066400000000000000000001072211324021332500210570ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_GPU_H_ #define LIBPLACEBO_GPU_H_ #include #include #include #include // This file contains the definition of an API which is designed to abstract // away from platform-specific APIs like the various OpenGL variants, Direct3D // and Vulkan in a common way. It is a much more limited API than those APIs, // since it tries targetting a very small common subset of features that is // needed to implement libplacebo's rendering. // // NOTE: When speaking of "valid usage" or "must", invalid usage is assumed to // result in undefined behavior. (Typically, an error message is printed to // stderr and libplacebo aborts). So ensuring valid API usage by the API user is // absolutely crucial. If you want to be freed from this reponsibility, use the // higher level abstractions provided by libplacebo alongside gpu.h. // Structure which wraps metadata describing GLSL capabilities. struct pl_glsl_desc { int version; // GLSL version (e.g. 450), for #version bool gles; // GLSL ES semantics (ESSL) bool vulkan; // GL_KHR_vulkan_glsl semantics }; typedef uint64_t pl_gpu_caps; enum { PL_GPU_CAP_COMPUTE = 1 << 0, // supports compute shaders PL_GPU_CAP_PARALLEL_COMPUTE = 1 << 1, // supports multiple compute queues PL_GPU_CAP_INPUT_VARIABLES = 1 << 2, // supports shader input variables }; // Structure defining the physical limits of this GPU instance. If a limit is // given as 0, that means that feature is unsupported. struct pl_gpu_limits { int max_tex_1d_dim; // maximum width for a 1D texture int max_tex_2d_dim; // maximum width/height for a 2D texture (required) int max_tex_3d_dim; // maximum width/height/depth for a 3D texture size_t max_pushc_size; // maximum push_constants_size size_t max_xfer_size; // maximum size of a PL_BUF_TEX_TRANSFER size_t max_ubo_size; // maximum size of a PL_BUF_UNIFORM size_t max_ssbo_size; // maximum size of a PL_BUF_STORAGE int max_buffer_texels; // maximum texels in a PL_BUF_TEXEL_* int min_gather_offset; // minimum textureGatherOffset offset int max_gather_offset; // maximum textureGatherOffset offset // Compute shader limits. Always available (non-zero) if PL_GPU_CAP_COMPUTE set size_t max_shmem_size; // maximum compute shader shared memory size int max_group_threads; // maximum number of local threads per work group int max_group_size[3]; // maximum work group size per dimension int max_dispatch[3]; // maximum dispatch size per dimension // These don't represent hard limits but indicate performance hints for // optimal alignment. For best performance, the corresponding field // should be aligned to a multiple of these. They will always be a power // of two. int align_tex_xfer_stride; // optimal pl_tex_transfer_params.stride_w/h size_t align_tex_xfer_offset; // optimal pl_tex_transfer_params.buf_offset }; // Abstract device context which wraps an underlying graphics context and can // be used to dispatch rendering commands. struct pl_gpu { struct pl_context *ctx; // the pl_context this GPU was initialized from struct pl_gpu_fns *impl; // the underlying implementation (unique per GPU) void *priv; pl_gpu_caps caps; // PL_GPU_CAP_* bit field struct pl_glsl_desc glsl; // GLSL version supported by this GPU struct pl_gpu_limits limits; // physical device limits // Note: Every GPU must support at least one of PL_GPU_CAP_INPUT_VARIABLES // or uniform buffers (limits.max_ubo_size > 0). // Supported texture formats, in preference order. (If there are multiple // similar formats, the "better" ones come first) const struct pl_fmt **formats; int num_formats; }; // Helper function to align the given dimension (e.g. width or height) to a // multiple of the optimal texture transfer stride. int pl_optimal_transfer_stride(const struct pl_gpu *gpu, int dimension); enum pl_fmt_type { PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float) PL_FMT_SNORM, // signed, normalized integer format (sampled as float) PL_FMT_UINT, // unsigned integer format (sampled as integer) PL_FMT_SINT, // signed integer format (sampled as integer) PL_FMT_FLOAT, // (signed) float formats, any bit size PL_FMT_TYPE_COUNT, }; enum pl_fmt_caps { PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX) PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG) PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR) PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt) PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend) PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit) PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer // Notes: // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE // - PL_FMT_CAP_STORABLE also implies PL_GPU_CAP_COMPUTE // - PL_FMT_CAP_VERTEX implies that the format is non-opaque }; // Structure describing a texel/vertex format. struct pl_fmt { const char *name; // symbolic name for this format (e.g. rgba32f) const void *priv; enum pl_fmt_type type; // the format's data type and interpretation enum pl_fmt_caps caps; // the features supported by this format int num_components; // number of components for this format int component_depth[4]; // meaningful bits per component, texture precision // This controls the relationship between the data as seen by the host and // the way it's interpreted by the texture. The host representation is // always tightly packed (no padding bits in between each component). // // If `opaque` is true, then there's no meaningful correspondence between // the two, and all of the remaining fields in this section are unset. // // If `emulated` is true, then this format doesn't actually exist on the // GPU as an uploadable texture format - and any apparent support is being // emulated (typically using compute shaders in the upload path). bool opaque; bool emulated; size_t texel_size; // total size in bytes per texel int host_bits[4]; // number of meaningful bits in host memory int sample_order[4]; // sampled index for each component, e.g. // {2, 1, 0, 3} for BGRA textures // If usable as a vertex or texel buffer format, this gives the GLSL type // corresponding to the data. (e.g. vec4) const char *glsl_type; // If usable as a storage image or texel storage buffer // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL // texel format corresponding to the format. (e.g. rgba16ui) const char *glsl_format; }; // Returns whether or not a pl_fmt's components are ordered sequentially // in memory in the order RGBA. bool pl_fmt_is_ordered(const struct pl_fmt *fmt); // Helper function to find a format with a given number of components and // minimum effective precision per component. If `host_bits` is set, then the // format will always be non-opaque, unpadded, ordered and have exactly this // bit depth for each component. Finally, all `caps` must be supported. const struct pl_fmt *pl_find_fmt(const struct pl_gpu *gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps); // Finds a vertex format for a given configuration. The resulting vertex will // have a component depth equivalent to to the sizeof() the equivalent host type. // (e.g. PL_FMT_FLOAT will always have sizeof(float)) const struct pl_fmt *pl_find_vertex_fmt(const struct pl_gpu *gpu, enum pl_fmt_type type, int num_components); // Find a format based on its name. const struct pl_fmt *pl_find_named_fmt(const struct pl_gpu *gpu, const char *name); enum pl_tex_sample_mode { PL_TEX_SAMPLE_NEAREST, // nearest neighour sampling PL_TEX_SAMPLE_LINEAR, // linear filtering }; enum pl_tex_address_mode { PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture }; // Structure describing a texture. struct pl_tex_params { int w, h, d; // physical dimension; unused dimensions must be 0 const struct pl_fmt *format; // The following bools describe what operations can be performed. The // corresponding pl_fmt capability must be set for every enabled // operation type. bool sampleable; // usable as a PL_DESC_SAMPLED_TEX bool renderable; // usable as a render target (pl_pass_run) // (must only be used with 2D textures) bool storable; // usable as a storage image (PL_DESC_IMG_*) bool blit_src; // usable as a blit source bool blit_dst; // usable as a blit destination bool host_writable; // may be updated with pl_tex_upload() bool host_readable; // may be fetched with pl_tex_download() // The following capabilities are only relevant for textures which have // either sampleable or blit_src enabled. enum pl_tex_sample_mode sample_mode; enum pl_tex_address_mode address_mode; // If non-NULL, the texture will be created with these contents. Using // this does *not* require setting host_writable. Otherwise, the initial // data is undefined. const void *initial_data; }; static inline int pl_tex_params_dimension(const struct pl_tex_params params) { return params.d ? 3 : params.h ? 2 : 1; } // Conflates the following typical GPU API concepts: // - texture itself // - sampler state // - staging buffers for texture upload // - framebuffer objects // - wrappers for swapchain framebuffers // - synchronization needed for upload/rendering/etc. // // Essentially a pl_tex can be anything ranging from a normal texture, a wrapped // external/real framebuffer, a framebuffer object + texture pair, a mapped // texture (via pl_hwdec), or other sorts of things that can be sampled from // and/or rendered to. struct pl_tex { struct pl_tex_params params; void *priv; }; // Create a texture (with undefined contents). Returns NULL on failure. This is // assumed to be an expensive/rare operation, and may need to perform memory // allocation or framebuffer creation. const struct pl_tex *pl_tex_create(const struct pl_gpu *gpu, const struct pl_tex_params *params); void pl_tex_destroy(const struct pl_gpu *gpu, const struct pl_tex **tex); // Invalidates the contents of a texture. After this, the contents are fully // undefined. void pl_tex_invalidate(const struct pl_gpu *gpu, const struct pl_tex *tex); // Clear the dst texture with the given color (rgba). This is functionally // identical to a blit operation, which means dst->params.blit_dst must be // set. void pl_tex_clear(const struct pl_gpu *gpu, const struct pl_tex *dst, const float color[4]); // Copy a sub-rectangle from one texture to another. The source/dest regions // must be within the texture bounds. Areas outside the dest region are // preserved. The formats of the textures must be loosely compatible - which // essentially means that they must have the same texel size. Additionally, // UINT textures can only be blitted to other UINT textures, and SINT textures // can only be blitted to other SINT textures. Finally, src.blit_src and // dst.blit_dst must be set, respectively. // // The rectangles may be "flipped", which leads to the image being flipped // while blitting. If the src and dst rects have different sizes, the source // image will be scaled according to src->params.sample_mode. That said, the // src and dst rects must be fully contained within the src/dst dimensions. void pl_tex_blit(const struct pl_gpu *gpu, const struct pl_tex *dst, const struct pl_tex *src, struct pl_rect3d dst_rc, struct pl_rect3d src_rc); // Structure describing a texture transfer operation. struct pl_tex_transfer_params { // Texture to transfer to/from. Depending on the type of the operation, // this must have params.host_writable (uploads) or params.host_readable // (downloads) set, respectively. const struct pl_tex *tex; // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y // and z fields of `rc`, as well as the corresponding strides, are ignored. // In all other cases, the stride must be >= the corresponding dimension // of `rc`, and the `rc` must be normalized and fully contained within the // image dimensions. If any of these parameters are left away (0), they // are inferred from the texture's size. struct pl_rect3d rc; // region of the texture to transfer unsigned int stride_w; // the number of texels per horizontal row (x axis) unsigned int stride_h; // the number of texels per vertical column (y axis) // For the data source/target of a transfer operation, there are two valid // options: // // 1. Transferring to/from a buffer: const struct pl_buf *buf; // buffer to use (type must be PL_BUF_TEX_TRANSFER) size_t buf_offset; // offset of data within buffer, must be a multiple of 4 // 2. Transferring to/from host memory directly: void *ptr; // address of data // The contents of the memory region / buffer must exactly match the // texture format; i.e. there is no explicit conversion between formats. // For data uploads, which are typically "fire and forget" operations, // which method used does not matter much; although uploading from a host // mapped buffer requires fewer memory copy operations and is therefore // advised when uploading large amounts of data frequently. // For data downloads, downloading directly to host memory is a blocking // operation and should therefore be avoided as much as possible. It's // highyly recommended to always use a texture transfer buffer for texture // downloads if possible, which allows the transfer to happen // asynchronously. // When performing a texture transfer using a buffer, the buffer may be // marked as "in use" and should not used for a different type of operation // until pl_buf_poll returns false. }; // Upload data to a texture. Returns whether successful. bool pl_tex_upload(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params); // Download data from a texture. Returns whether successful. bool pl_tex_download(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params); // Buffer usage type. This restricts what types of operations may be performed // on a buffer. enum pl_buf_type { PL_BUF_INVALID = 0, PL_BUF_TEX_TRANSFER, // texture transfer buffer (for pl_tex_upload/download) PL_BUF_UNIFORM, // UBO, for PL_DESC_BUF_UNIFORM PL_BUF_STORAGE, // SSBO, for PL_DESC_BUF_STORAGE PL_BUF_TEXEL_UNIFORM,// texel buffer, for PL_DESC_BUF_TEXEL_UNIFORM PL_BUF_TEXEL_STORAGE,// texel buffer, for PL_DESC_BUF_TEXEL_STORAGE PL_BUF_PRIVATE, // GPU-private usage (interpretation arbitrary) PL_BUF_TYPE_COUNT, }; // Structure describing a buffer. struct pl_buf_params { enum pl_buf_type type; size_t size; // size in bytes bool host_mapped; // create a persistent, RW mapping (pl_buf.data) bool host_writable; // contents may be updated via pl_buf_write() bool host_readable; // contents may be read back via pl_buf_read() // For texel buffers (PL_BUF_TEXEL_*), this gives the interpretation of the // buffer's contents. `format->caps` must include the corresponding // PL_FMT_CAP_TEXEL_* for the texel buffer type in use. const struct pl_fmt *format; // If non-NULL, the buffer will be created with these contents. Otherwise, // the initial data is undefined. Using this does *not* require setting // host_writable. const void *initial_data; }; // A generic buffer, which can be used for multiple purposes (texture transfer, // storage buffer, uniform buffer, etc.) // // Note on efficiency: A pl_buf does not necessarily represent a true "buffer" // object on the underlying graphics API. It may also refer to a sub-slice of // a larger buffer, depending on the implementation details of the GPU. The // bottom line is that users do not need to worry about the efficiency of using // many small pl_buf objects. Having many small pl_bufs, even lots of few-byte // vertex buffers, is designed to be completely fine. struct pl_buf { struct pl_buf_params params; char *data; // for persistently mapped buffers, points to the first byte void *priv; }; // Create a buffer. The type of buffer depends on the parameters. The buffer // parameters must adhere to the restrictions imposed by the pl_gpu_limits. // Returns NULL on failure. const struct pl_buf *pl_buf_create(const struct pl_gpu *gpu, const struct pl_buf_params *params); void pl_buf_destroy(const struct pl_gpu *gpu, const struct pl_buf **buf); // Update the contents of a buffer, starting at a given offset (must be a // multiple of 4) and up to a given size, with the contents of *data. void pl_buf_write(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t buf_offset, const void *data, size_t size); // Read back the contents of a buffer, starting at a given offset (must be a // multiple of 4) and up to a given size, storing the data into *dest. // Returns whether successful. bool pl_buf_read(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t buf_offset, void *dest, size_t size); // Returns whether or not a buffer is currently "in use". This can either be // because of a pending read operation or because of a pending write operation. // Coalescing multiple types of the same access (e.g. uploading the same buffer // to multiple textures) is fine, but trying to read a buffer while it is being // written to or trying to write to a buffer while it is being read from will // almost surely result in graphical corruption. The GPU makes no attempt to // enforce this, it is up to the user to check and adhere to whatever // restrictions are necessary. // // The `timeout`, specified in nanoseconds, indicates how long to block for // before returning. If set to 0, this function will never block, and only // returns the current status of the buffer. The actual precision of the // timeout may be significantly longer than one nanosecond, and has no upper // bound. This function does not provide hard latency guarantees. // // Note: Destroying a buffer (pl_buf_destroy) is always valid, even if that // buffer is in use. bool pl_buf_poll(const struct pl_gpu *gpu, const struct pl_buf *buf, uint64_t timeout); // Data type of a shader input variable (e.g. uniform, or UBO member) enum pl_var_type { PL_VAR_INVALID = 0, PL_VAR_SINT, // C: int GLSL: int/ivec PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec PL_VAR_FLOAT, // C: float GLSL: float/vec/mat PL_VAR_TYPE_COUNT }; // Returns the host size (in bytes) of a pl_var_type. size_t pl_var_type_size(enum pl_var_type type); // Represents a shader input variable (concrete data, e.g. vector, matrix) struct pl_var { const char *name; // name as used in the shader enum pl_var_type type; // The total number of values is given by dim_v * dim_m. For example, a // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4 // and dim_m = 3. int dim_v; // vector dimension int dim_m; // matrix dimension (number of columns, see below) int dim_a; // array dimension }; // Returns a GLSL type name (e.g. vec4) for a given pl_var, or NULL if the // variable is not legal. Not that the array dimension is ignored, since the // array dimension is usually part of the variable name and not the type name. const char *pl_var_glsl_type_name(struct pl_var var); // Helper functions for constructing the most common pl_vars. struct pl_var pl_var_uint(const char *name); struct pl_var pl_var_float(const char *name); struct pl_var pl_var_vec2(const char *name); struct pl_var pl_var_vec3(const char *name); struct pl_var pl_var_vec4(const char *name); struct pl_var pl_var_mat2(const char *name); struct pl_var pl_var_mat3(const char *name); struct pl_var pl_var_mat4(const char *name); // Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means // that the pl_var's type will be the same as the vertex's sampled type (e.g. // PL_FMT_UNORM gets turned into PL_VAR_FLOAT). struct pl_var pl_var_from_fmt(const struct pl_fmt *fmt, const char *name); // Describes the memory layout of a variable, relative to some starting location // (typically the offset within a uniform/storage/pushconstant buffer) // // Note on matrices: All GPUs expect column major matrices, for both buffers and // input variables. Care needs to be taken to avoid trying to use e.g. a // pl_matrix3x3 (which is row major) directly as a pl_var_update.data! // // In terms of the host layout, a column-major matrix (e.g. matCxR) with C // columns and R rows is treated like an array vecR[C]. The `stride` here refers // to the separation between these array elements, i.e. the separation between // the individual columns. // // Visualization of a mat4x3: // // 0 1 2 3 <- columns // 0 [ (A) (D) (G) (J) ] // 1 [ (B) (E) (H) (K) ] // 2 [ (C) (F) (I) (L) ] // ^ rows // // Layout in GPU memory: (stride=16, size=60) // // [ A B C ] X <- column 0, offset +0 // [ D E F ] X <- column 1, offset +16 // [ G H I ] X <- column 2, offset +32 // [ J K L ] <- column 3, offset +48 // // Note the lack of padding on the last column in this example. // In general: size <= stride * dim_m // // C representation: (stride=12, size=48) // // { { A, B, C }, // { D, E, F }, // { G, H, I }, // { J, K, L } } // // Note on arrays: `stride` represents both the stride between elements of a // matrix, and the stride between elements of an array. That is, there is no // distinction between the columns of a matrix and the rows of an array. For // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10. struct pl_var_layout { size_t offset; // the starting offset of the first byte size_t stride; // the delta between two elements of an array/matrix size_t size; // the total size of the input }; // Returns the host layout of an input variable as required for a // tightly-packed, byte-aligned C data type, given a starting offset. struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var); // Returns the layout requirements of a uniform buffer element given a current // buffer offset. If limits.max_ubo_size is 0, then this function returns {0}. // // Note: In terms of the GLSL, this is always *specified* as std140 layout, but // because of the way GLSL gets translated to other APIs (notably D3D11), the // actual buffer contents may vary considerably from std140. As such, the // calling code should not make any assumptions about the buffer layout and // instead query the layout requirements explicitly using this function. // // The normal way to use this function is when calculating the size and offset // requirements of a uniform buffer in an incremental fashion, to calculate the // new offset of the next variable in this buffer. struct pl_var_layout pl_buf_uniform_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var); // Returns the layout requirements of a storage buffer element given a current // buffer offset. If limits.max_ssbo_size is 0, then this function returns {0}. // // Note: In terms of the GLSL, this is always *specified* as std430 layout, but // like with pl_buf_uniform_layout, the actual implementation may disagree. struct pl_var_layout pl_buf_storage_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var); // Returns the layout requirements of a push constant element given a current // push constant offset. If `gpu->limits.max_pushc_size` is 0, then this // function returns {0}. struct pl_var_layout pl_push_constant_layout(const struct pl_gpu *gpu, size_t offset, const struct pl_var *var); // Like memcpy, but copies bytes from `src` to `dst` in a manner governed by // the stride and size of `dst_layout` as well as `src_layout`. Also takes // into account the respective `offset`. void memcpy_layout(void *dst, struct pl_var_layout dst_layout, const void *src, struct pl_var_layout src_layout); // Represents a vertex attribute. struct pl_vertex_attrib { const char *name; // name as used in the shader const struct pl_fmt *fmt; // data format (must have PL_FMT_CAP_VERTEX) size_t offset; // byte offset into the vertex struct int location; // vertex location (as used in the shader) }; // Type of a shader input descriptor. enum pl_desc_type { PL_DESC_INVALID = 0, PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler // (pl_tex->params.sampleable must be set) PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image // (pl_tex->params.storable must be set) PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer // (pl_buf->params.type must be PL_BUF_UNIFORM) PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer // (pl_buf->params.type must be PL_BUF_STORAGE) PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer // (pl_buf->params.type must be PL_BUF_TEXEL_UNIFORM) PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer // (pl_buf->params.type must be PL_BUF_TEXEL_STORAGE) PL_DESC_TYPE_COUNT }; // Returns an abstract namespace index for a given descriptor type. This will // always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use // this to figure out which descriptors may share the same value of `binding`. // Bindings must only be unique for all descriptors within the same namespace. int pl_desc_namespace(const struct pl_gpu *gpu, enum pl_desc_type type); // Access mode of a shader input descriptor. enum pl_desc_access { PL_DESC_ACCESS_READWRITE, PL_DESC_ACCESS_READONLY, PL_DESC_ACCESS_WRITEONLY, }; // Returns the GLSL syntax for a given access mode (e.g. "readonly"). const char *pl_desc_access_glsl_name(enum pl_desc_access mode); struct pl_buffer_var { struct pl_var var; struct pl_var_layout layout; }; // Represents a shader descriptor (e.g. texture or buffer binding) struct pl_desc { const char *name; // name as used in the shader enum pl_desc_type type; // The binding of this descriptor, as used in the shader. All bindings // within a namespace must be unique. (see: pl_desc_namespace) int binding; // For storage images and storage buffers, this can be used to restrict // the type of access that may be performed on the descriptor. Ignored for // the other descriptor types (uniform buffers and sampled textures are // always read-only). enum pl_desc_access access; // For PL_DESC_BUF_UNIFORM/STORAGE, this specifies the layout of the // variables contained by a buffer. Ignored for the other descriptor types struct pl_buffer_var *buffer_vars; int num_buffer_vars; }; // Framebuffer blending mode (for raster passes) enum pl_blend_mode { PL_BLEND_ZERO, PL_BLEND_ONE, PL_BLEND_SRC_ALPHA, PL_BLEND_ONE_MINUS_SRC_ALPHA, }; struct pl_blend_params { enum pl_blend_mode src_rgb; enum pl_blend_mode dst_rgb; enum pl_blend_mode src_alpha; enum pl_blend_mode dst_alpha; }; enum pl_prim_type { PL_PRIM_TRIANGLE_LIST, PL_PRIM_TRIANGLE_STRIP, PL_PRIM_TRIANGLE_FAN, }; enum pl_pass_type { PL_PASS_INVALID = 0, PL_PASS_RASTER, // vertex+fragment shader PL_PASS_COMPUTE, // compute shader (requires PL_GPU_CAP_COMPUTE) PL_PASS_TYPE_COUNT, }; // Description of a rendering pass. It conflates the following: // - GLSL shader(s) and its list of inputs // - target parameters (for raster passes) struct pl_pass_params { enum pl_pass_type type; // Input variables. Only supported if PL_GPU_CAP_INPUT_VARIABLES is set. // Otherwise, num_variables must be 0. struct pl_var *variables; int num_variables; // Input descriptors. (Always supported) struct pl_desc *descriptors; int num_descriptors; // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size size_t push_constants_size; // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as // a compute shader. const char *glsl_shader; // Highly implementation-specific byte array storing a compiled version of // the same shader. Can be used to speed up pass creation on already // known/cached shaders. // // Note: There are no restrictions on this. Passing an out-of-date cache, // passing a cache corresponding to a different progam, or passing a cache // belonging to a different GPU, are all valid. But obviously, in such cases, // there is no benefit in doing so. const uint8_t *cached_program; size_t cached_program_len; // --- type==PL_PASS_RASTER only // Describes the interpretation and layout of the vertex data. enum pl_prim_type vertex_type; struct pl_vertex_attrib *vertex_attribs; int num_vertex_attribs; size_t vertex_stride; // The vertex shader itself. const char *vertex_shader; // The target dummy texture this renderpass is intended to be used with. // This doesn't have to be a real texture - the caller can also pass a // blank pl_tex object, as long as target_dummy.params.format is set. The // format must support PL_FMT_CAP_RENDERABLE, and the target dummy must // have `renderable` enabled. // // If you pass a real texture here, the GPU backend may be able to optimize // the render pass better for the specific requirements of this texture. // This does not change the semantics of pl_pass_run, just perhaps the // performance. (The `priv` pointer will be cleared by pl_pass_create, so // there is no risk of a dangling reference) struct pl_tex target_dummy; // Target blending mode. If this is NULL, blending is disabled. Otherwise, // the `target_dummy.params.format` must have PL_FMT_CAP_BLENDABLE. const struct pl_blend_params *blend_params; // If false, the target's existing contents will be discarded before the // pass is run. (Semantically equivalent to calling pl_tex_invalidate // before every pl_pass_run, but slightly more efficient) bool load_target; }; // Conflates the following typical GPU API concepts: // - various kinds of shaders // - rendering pipelines // - descriptor sets, uniforms, other bindings // - all synchronization necessary // - the current values of all inputs struct pl_pass { struct pl_pass_params params; void *priv; }; // Compile a shader and create a render pass. This is a rare/expensive // operation and may take a significant amount of time, even if a cached // program is used. Returns NULL on failure. // // The resulting pl_pass->params.cached_program will be initialized by // this function to point to a new, valid cached program (if any). const struct pl_pass *pl_pass_create(const struct pl_gpu *gpu, const struct pl_pass_params *params); void pl_pass_destroy(const struct pl_gpu *gpu, const struct pl_pass **pass); struct pl_desc_binding { const void *object; // pl_* object with type corresponding to pl_desc_type }; struct pl_var_update { int index; // index into params.variables[] const void *data; // pointer to raw byte data corresponding to pl_var_host_layout() }; struct pl_pass_run_params { const struct pl_pass *pass; // This list only contains descriptors/variables which have changed // since the previous invocation. All non-mentioned variables implicitly // preserve their state from the last invocation. struct pl_var_update *var_updates; int num_var_updates; // This list contains all descriptors used by this pass. It must // always be filled, even if the descriptors haven't changed. The order // must match that of pass->params.descriptors struct pl_desc_binding *desc_bindings; // The push constants for this invocation. This must always be set and // fully defined for every invocation if params.push_constants_size > 0. void *push_constants; // --- pass->params.type==PL_PASS_RASTER only // Target must be a 2D texture, target->params.renderable must be true, and // target->params.format must match pass->params.target_fmt. If the viewport // or scissors are left blank, they are inferred from target->params. // // WARNING: Rendering to a *target that is being read from by the same // shader is undefined behavior. In general, trying to bind the same // resource multiple times to the same shader is undefined behavior. const struct pl_tex *target; struct pl_rect2d viewport; // screen space viewport (must be normalized) struct pl_rect2d scissors; // target render scissors (must be normalized) void *vertex_data; // raw pointer to vertex data int vertex_count; // number of vertices to render // --- pass->params.type==PL_PASS_COMPUTE only // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the // corresponding index of limits.max_dispatch int compute_groups[3]; }; // Execute a render pass. void pl_pass_run(const struct pl_gpu *gpu, const struct pl_pass_run_params *params); // This is semantically a no-op, but it provides a hint that you want to flush // any partially queued up commands and begin execution. There is normally no // need to call this, because queued commands will always be implicitly flushed // whenever necessary to make forward progress on commands like `pl_buf_poll`, // or when submitting a frame to a swapchain for display. In fact, calling this // function can negatively impact performance, because some GPUs rely on being // able to re-order and modify queued commands in order to enable optimizations // retroactively. // // The only time this might be beneficial to call explicitly is if you're doing // lots of offline rendering over a long period of time, and only fetching the // results (via pl_tex_download) at the very end. void pl_gpu_flush(const struct pl_gpu *gpu); #endif // LIBPLACEBO_GPU_H_ libplacebo-0.4.0/src/include/libplacebo/renderer.h000066400000000000000000000510451324021332500220740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_RENDERER_H_ #define LIBPLACEBO_RENDERER_H_ #include #include #include #include #include #include struct pl_renderer; // Creates a new renderer object, which is backed by a GPU context. This is a // high-level object that takes care of the rendering chain as a whole, from // the source textures to the finished frame. struct pl_renderer *pl_renderer_create(struct pl_context *ctx, const struct pl_gpu *gpu); void pl_renderer_destroy(struct pl_renderer **rr); // Flushes the internal redraw cache of this renderer. This is normally not // needed, even if the image parameters, colorspace or target configuration // change, since libplacebo will internally detect such circumstances and // invalidate stale caches. Doing this explicitly *may* be useful to ensure // that memory resources associated with old objects are freed; or in case // the user wants to switch to a new file with a different interpretation of // `pl_image.signature`. void pl_renderer_flush_cache(struct pl_renderer *rr); // Represents the options used for rendering. These affect the quality of // the result. struct pl_render_params { // Configures the algorithms used for upscaling and downscaling, // respectively. If left as NULL, then libplacebo will only use inexpensive // sampling (bicubic, bilinear or neareast neighbour depending on the // capabilities of the hardware). // // Note: Setting `downscaler` to NULL also implies `skip_anti_aliasing`, // since the built-in GPU sampling algorithms can't anti-alias. // // Note: If set to the same address as the built-in `pl_filter_bicubic`, // `pl_filter_box` etc.; libplacebo will also use the more efficient // direct sampling algorithm where possible without quality loss. const struct pl_filter_config *upscaler; const struct pl_filter_config *downscaler; // The number of entries for the scaler LUTs. Defaults to 64 if left unset. int lut_entries; // The anti-ringing strength to apply to non-polar filters. See the // equivalent option in `pl_sample_filter_params` for more information. float antiringing_strength; // Configures the algorithm used for frame mixing (when using // `pl_render_image_mix`). Ignored otherwise. As a special requirement, // this must be a filter config with `polar` set to false, since it's only // used for 1D mixing and thus only 1D filters are compatible. If left as // NULL, then libplacebo will use a built-in, inexpensive frame mixing // algorithm. // // It's worth pointing out that this built-in frame mixing can often be // better than any of the available filter configurations. So it's not a // bad idea to leave this as NULL. In fact, that's the recommended default. const struct pl_filter_config *frame_mixer; // Configures the settings used to deband source textures. Leaving this as // NULL disables debanding. const struct pl_deband_params *deband_params; // Configures the settings used to sigmoidize the image before upscaling. // This is not always used. If NULL, disables sigmoidization. const struct pl_sigmoid_params *sigmoid_params; // Configures the color adjustment parameters used to decode the color. // This can be used to apply additional artistic settings such as // desaturation, etc. If NULL, defaults to &pl_color_adjustment_neutral. const struct pl_color_adjustment *color_adjustment; // Configures the settings used to tone map from HDR to SDR, or from higher // gamut to standard gamut content. If NULL, defaults to // `&pl_color_map_default_params`. const struct pl_color_map_params *color_map_params; // Configures the settings used to dither to the output depth. Leaving this // as NULL disables dithering. const struct pl_dither_params *dither_params; // --- Performance / quality trade-off options: // These should generally be left off where quality is desired, as they can // degrade the result quite noticeably; but may be useful for older or // slower hardware. Note that libplacebo will automatically disable // advanced features on hardware where they are unsupported, regardless of // these settings. So only enable them if you need a performance bump. // Disables anti-aliasing on downscaling. This will result in moiré // artifacts and nasty, jagged pixels when downscaling, except for some // very limited special cases (e.g. bilinear downsampling to exactly 0.5x). // // Significantly speeds up downscaling with high downscaling ratios. bool skip_anti_aliasing; // Cutoff value for polar sampling. See the equivalent option in // `pl_sample_filter_params` for more information. float polar_cutoff; // Skips dispatching the high-quality scalers for overlay textures, and // always falls back to built-in GPU samplers. Note: The scalers are // already disabled if the overlay texture does not need to be scaled. bool disable_overlay_sampling; // --- Performance tuning / debugging options // These may affect performance or may make debugging problems easier, // but shouldn't have any effect on the quality. // Disables the use of a redraw cache. Normally, when rendering the same // frame multiple times (as identified via pl_image.signature), libplacebo // will try to skip redraws by using a cache of results. However, in some // circumstances, such as when the user knows that there will be no or // infrequent redraws, or when the user can't come up with meaningful // `signature` values, this field will allow disabling the use of a cache. // // It's worth pointing out that the user can toggle this field on and off // at any point in time, even on subsequent frames. The meaning of the // field simply means that libplacebo will act as if the cache didn't // exist; it will not be read from, written to, or updated. // // It's also worth pointing out that this option being `false` does not // guarantee the use of a redraw cache. It will be implicitly disabled, for // example, if the hardware does not support the required features // (typically the presence of blittable texture formats). bool skip_redraw_caching; // Disables linearization / sigmoidization before scaling. This might be // useful when tracking down unexpected image artifacts or excessing // ringing, but it shouldn't normally be necessary. bool disable_linear_scaling; // Forces the use of the "general" scaling algorithms even when using the // special-cased built-in presets like `pl_filter_bicubic`. Basically, this // disables the more efficient implementations in favor of the slower, // general-purpose ones. bool disable_builtin_scalers; }; // This contains the default/recommended options for reasonable image quality, // while also not being too terribly slow. All of the *_params structs // are defaulted to the corresponding *_default_params. extern const struct pl_render_params pl_render_default_params; #define PL_MAX_PLANES 4 // High level description of a single slice of an image. This basically // represents a single 2D plane, with any number of components struct pl_plane { // The texture underlying this plane. The texture must be 2D, and // `texture->params.sampleable` must be true. const struct pl_tex *texture; // Describes the number and interpretation of the components in this plane. // This defines the mapping from component index to the canonical component // order (RGBA, YCbCrA or XYZA). It's worth pointing out that this is // completely separate from `texture->format.sample_order`. The latter is // essentially irrelevant/transparent for the API user, since it just // determines which order the texture data shows up as inside the GLSL // shader; whereas this field controls the actual meaning of the component. // // Example; if the user has a plane with just {Y} and a plane with just // {Cb Cr}, and a GPU that only supports bgra formats, you would still // specify the component mapping as {0} and {1 2} respectively, even though // the GPU is sampling the data in the order BGRA. Use -1 for "ignored" // components. int components; // number of relevant components int component_mapping[4]; // semantic index of each component // Controls the sample offset, relative to the "reference" dimensions. For // an example of what to set here, see `pl_chroma_location_offset`. Note // that this is given in unit of reference pixels. For a graphical example, // imagine you have a 2x2 image with a 1x1 (subsampled) plane. Without any // shift (0.0), the situation looks like this: // // X-------X X = reference pixel // | | P = plane pixel // | P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_CENTER. If the // shift_x was instead set to -0.5, the `P` pixel would be offset to the // left by half the separation between the reference (`X` pixels), resulting // in the following: // // X-------X X = reference pixel // | | P = plane pixel // P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_LEFT. // // Note: It's recommended to fill this using `pl_chroma_location_offset` on // the chroma planes. float shift_x, shift_y; }; enum pl_overlay_mode { PL_OVERLAY_NORMAL = 0, // treat the texture as a normal, full-color texture PL_OVERLAY_MONOCHROME, // treat the texture as a single-component alpha map }; // A struct representing an image overlay (e.g. for subtitles or on-screen // status messages, controls, ...) struct pl_overlay { // The plane to overlay. Multi-plane overlays are not supported. If // necessary, multiple planes can be combined by treating them as separate // overlays with different base colors. // // Note: shift_x/y are simply treated as a uniform sampling offset. struct pl_plane plane; // The (absolute) coordinates at which to render this overlay texture. May // be flipped, and partially or wholly outside the image. If the size does // not exactly match the texture, it will be scaled/stretched to fit. struct pl_rect2d rect; // This controls the coloring mode of this overlay. enum pl_overlay_mode mode; // If `mode` is PL_OVERLAY_MONOCHROME, then the texture is treated as an // alpha map and multiplied by this base color. Ignored for the other modes. float base_color[3]; // This controls the colorspace information for this overlay. The contents // of the texture / the value of `color` are interpreted according to this. struct pl_color_repr repr; struct pl_color_space color; }; // High-level description of a source image to render struct pl_image { // A generic signature uniquely identifying this image. The contents don't // matter, as long as they're unique for "identical" frames. This signature // is used to cache intermediate results, thus speeding up redraws. // In practice, the user might set this to e.g. an incrementing counter. // // If the user can't ensure the uniqueness of this signature for whatever // reason, they must set `pl_render_params.skip_redraw_caching`, in which // case the contents of this field are ignored. // // NOTE: Re-using the same `signature` also requires that the contents of // the planes (plane[i].texture) as well as any overlays has not changed // since the previous usage. In other words, touching the texture in any // way using the pl_tex_* APIs and then trying to re-use them for the same // signature, or trying to re-use the same signature with different // textures, is undefined behavior. (It's the *contents* that matter here, // the actual texture object can be a different one, as long as the // contents and parameters are the same) uint64_t signature; // Each frame is split up into some number of planes, each of which may // carry several components and be of any size / offset. int num_planes; struct pl_plane planes[PL_MAX_PLANES]; // Color representation / encoding / semantics associated with this image struct pl_color_repr repr; struct pl_color_space color; // The reference dimensions of this image. For typical content, this is the // dimensions of the largest (non-subsampled) plane, e.g. luma. Note that // for anamorphic content, this is the size of the texture itself, not the // "nominal" size of the video. (Anamorphic pixel ratio conversions are // done implicitly by differing the aspect ratio between `src_rect` and // `dst_rect`) int width; int height; // The source rectangle which we want to render from, relative to the // reference dimensions. Pixels outside of this rectangle will ostensibly // be ignored, but note that they may still contribute to the output data // due to the effects of texture filtering. `src_rect` may be flipped, and // may be partially or wholly outside the bounds of the texture. (Optional) struct pl_rect2df src_rect; // A list of additional overlays to render directly on top of this image. // These overlays will be treated as though they were part of the image, // which means they will be affected by the main scaler as well as by // frame mixing algorithms. See also `pl_target.overlays` const struct pl_overlay *overlays; int num_overlays; // Note on subsampling and plane correspondence: All planes belonging to // the same image will only be streched by an integer multiple (or inverse // thereof) in order to match the reference dimensions of this image. For // example, suppose you have an 8x4 image. A valid plane scaling would be // 4x2 -> 8x4 or 4x4 -> 4x4, but not 6x4 -> 8x4. So if a 6x4 plane is // given, then it would be treated like a cropped 8x4 plane (since 1.0 is // the closest scaling ratio to the actual ratio of 1.3). // // For an explanation of why this makes sense, consider the relatively // common example of a subsampled, oddly sized (e.g. jpeg) image. In such // cases, for example a 35x23 image, the 4:2:0 subsampled chroma plane // would have to end up as 17.5x11.5, which gets rounded up to 18x12 by // implementations. So in this example, the 18x12 chroma plane would get // treated by libplacebo as an oversized chroma plane - i.e. the plane // would get sampled as if it was 17.5 pixels wide and 11.5 pixels large. }; // Represents the target of a rendering operation struct pl_render_target { // The framebuffer (or texture) we want to render to. Must have `renderable` // set. The other capabilities are optional, but in particular `storable` // and `blittable` can help boost performance if available. const struct pl_tex *fbo; // The destination rectangle which we want to render into. If this is // larger or smaller than the src_rect, or if the aspect ratio is // different, scaling will occur. `dst_rect` may be flipped, and may be // partially or wholly outside the bounds of the fbo. (Optional) struct pl_rect2d dst_rect; // The color representation and space of the output. If this does not match // the color space of the source, libplacebo will convert the colors // automatically. struct pl_color_repr repr; struct pl_color_space color; // A list of additional overlays to render directly onto the output. These // overlays will be rendered after the image itself has been fully scaled // and output, and will not be affected by e.g. frame mixing. See also // `pl_image.overlays` const struct pl_overlay *overlays; int num_overlays; }; // Fills in a pl_render_target based on a swapchain frame's FBO and metadata. void pl_render_target_from_swapchain(struct pl_render_target *out_target, const struct pl_swapchain_frame *frame); // Render a single image to a target using the given parameters. This is // fully dynamic, i.e. the params can change at any time. libplacebo will // internally detect and flush whatever caches are invalidated as a result of // changing colorspace, size etc. bool pl_render_image(struct pl_renderer *rr, const struct pl_image *image, const struct pl_render_target *target, const struct pl_render_params *params); /* TODO // Represents a mixture of input images, distributed temporally struct pl_image_mix { // The number of images in this mixture. The number of images should be // sufficient to meet the needs of the configured frame mixer. See the // section below for more information. int num_images; // A list of the images themselves. The images can have different // colorspaces, configurations of planes, or even sizes. Note: when using // frame mixing, it's absolutely critical that all of the images have // a unique value of `pl_image.signature`. struct pl_image *images; // A list of relative distance vectors for each image, respectively. // Basically, the "current" instant is always assigned a position of 0.0; // and this distances array will give the relative offset (either negative // or positive) of the images in the mixture. The values are expected to be // normalized such that a separation of 1.0 corresponds to roughly one // nominal source frame duration. So a constant framerate video file will // always have distances like e.g. {-2.3, -1.3, -0.3, 0.7, 1.7, 2.7}, using // an example radius of 3. // // In cases where the framerate is variable (e.g. VFR video), the choice of // what to scale to use can be difficult to answer. A typical choice would // be either to use the canonical (container-tagged) framerate, or the // highest momentary framerate, as a reference. float *distances; // The duration for which the resulting image will be held, using the same // scale as the `distance`. This duration is centered around the instant // 0.0. Basically, the image is assumed to be displayed from the time // -vsync_duration/2 up to the time vsync_duration/2. float vsync_duration; // Explanation of the frame mixing radius: The algorithm chosen in // `pl_render_params.frame_mixing` has a canonical radius equal to // `pl_filter_config.kernel->radius`. This means that the frame mixing // algorithm will (only) need to consult all of the frames that have a // distance within the interval [-radius, radius]. As such, the user should // include all such frames in `images`, but may prune or omit frames that // lie outside it. // // The built-in frame mixing (`pl_render_params.frame_mixing == NULL`) has // a canonical radius equal to vsync_duration/2. }; // Render a mixture of images to the target using the given parameters. This // functions much like a generalization of `pl_render_image`, for when the API // user has more control over the frame queue / vsync timings and can present a // complete picture of the current instant's neighbourhood. This allows // libplacebo to use frame blending in order to eliminate judder artifacts // typically associated with source/display frame rate mismatch. // // In particular, pl_render_image can be semantically viewed as a special case // of pl_render_image_mix, where num_images = 1, that frame's distance is 0.0, // and the vsync_duration is 0.0. (But using `pl_render_image` instead of // `pl_render_image_mix` in such an example can still be more efficient) bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_image_mix *mix, const struct pl_render_target *target, const struct pl_render_params *params); */ #endif // LIBPLACEBO_RENDERER_H_ libplacebo-0.4.0/src/include/libplacebo/shaders.h000066400000000000000000000177441324021332500217270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_H_ #define LIBPLACEBO_SHADERS_H_ // This function defines the "direct" interface to libplacebo's GLSL shaders, // suitable for use in contexts where the user controls GLSL shader compilation // but wishes to include functions generated by libplacebo as part of their // own rendering process. This API is normally not used for operation with // libplacebo's higher-level constructs such as `pl_dispatch` or `pl_renderer`. #include struct pl_shader; // Creates a new, blank, mutable pl_shader object. The resulting pl_shader s // implicitly destroyed when the pl_context is destroyed. // // If `gpu` is non-NULL, then this `gpu` will be used to create objects such as // textures and buffers, or check for required capabilities, for operations // which depend on either of those. This is fully optional, i.e. these GLSL // primitives are designed to be used without a dependency on `gpu` wherever // possible - however, some features may not work, and will be disabled even // if requested. // // The `ident` represents some arbitrary value that identifies this pl_shader. // The semantics of the identifier work like a "namespace". This parameter is // only relevant if you plan on merging multiple shaders together, which // requires that all of the merged shaders have unique identifiers. It can // safely be left as 0 if unneeded. // // The `index` represents an abstract frame index, which shaders may use // internally to do things like temporal dithering or seeding PRNGs. If the // user does not care about temporal dithering/debanding, or wants determinstic // rendering, this may safely be left as 0. Otherwise, it should be incremented // by 1 on successive frames. struct pl_shader *pl_shader_alloc(struct pl_context *ctx, const struct pl_gpu *gpu, uint8_t ident, uint8_t index); // Frees a pl_shader and all resources associated with it. void pl_shader_free(struct pl_shader **sh); // Resets a pl_shader to a blank slate, without releasing internal memory. // If you're going to be re-generating shaders often, this function will let // you skip the re-allocation overhead. void pl_shader_reset(struct pl_shader *sh, uint8_t ident, uint8_t index); // Returns whether or not a pl_shader needs to be run as a compute shader. This // will never be the case unless the `gpu` this pl_shader was created against // supports PL_GPU_CAP_COMPUTE. bool pl_shader_is_compute(const struct pl_shader *sh); // Returns whether or not the shader has any particular output size // requirements. Some shaders, in particular those that sample from other // textures, have specific output size requirements which need to be respected // by the caller. If this is false, then the shader is compatible with every // output size. If true, the size requirements are stored into *w and *h. bool pl_shader_output_size(const struct pl_shader *sh, int *w, int *h); // Returns a signature (like a hash, or checksum) of a shader. This is a // collision-resistant number identifying the internal state of a pl_shader. // Two pl_shaders will only have the same signature if they are compatible. // Compatibility in this context means that they differ only in the contents // of variables, vertex attributes or descriptor bindings. The structure, // shader text and number/names of input variables/descriptors/attributes must // be the same. Note that computing this function takes some time, so the // results should be re-used where possible. uint64_t pl_shader_signature(const struct pl_shader *sh); // Indicates the type of signature that is associated with a shader result. // Every shader result defines a function that may be called by the user, and // this enum indicates the type of value that this function takes and/or // returns. // // Which signature a shader ends up with depends on the type of operation being // performed by a shader fragment, as determined by the user's calls. See below // for more information. enum pl_shader_sig { PL_SHADER_SIG_NONE = 0, // no input / void output PL_SHADER_SIG_COLOR, // vec4 color (normalized so that 1.0 is the ref white) }; // Represents a finalized shader fragment. This is not a complete shader, but a // collection of raw shader text together with description of the input // attributes, variables and vertexes it expects to be available. struct pl_shader_res { // The shader text, as literal GLSL. This will always be a function // definition, such that the the function with the indicated name and // signature may be called by the user. const char *glsl; const char *name; enum pl_shader_sig input; // what the function expects enum pl_shader_sig output; // what the function returns // For compute shaders (pl_shader_is_compute), this indicates the requested // work group size. Otherwise, both fields are 0. The interpretation of // these work groups is that they're tiled across the output image. int compute_group_size[2]; // If this pass is a compute shader, this field indicates the shared memory // size requirements for this shader pass. size_t compute_shmem; // A set of input vertex attributes needed by this shader fragment. struct pl_shader_va *vertex_attribs; int num_vertex_attribs; // A set of input variables needed by this shader fragment. struct pl_shader_var *variables; int num_variables; // A list of input descriptors needed by this shader fragment, struct pl_shader_desc *descriptors; int num_descriptors; }; // Represents a vertex attribute. The four values will be bound to the four // corner vertices respectively, in row-wise order starting from the top left: // data[0] data[1] // data[2] data[3] struct pl_shader_va { struct pl_vertex_attrib attr; // VA type, excluding `offset` and `location` const void *data[4]; }; // Represents a bound shared variable / descriptor struct pl_shader_var { struct pl_var var; // the underlying variable description const void *data; // the raw data (interpretation as with pl_var_update) bool dynamic; // if true, the value is expected to change frequently }; struct pl_shader_desc { struct pl_desc desc; // descriptor type, excluding `binding` const void *object; // the object being bound (as for pl_desc_binding) }; // Finalize a pl_shader. It is no longer mutable at this point, and any further // attempts to modify it result in an error. (Functions which take a const // struct pl_shader * argument do not modify the shader and may be freely // called on an already-finalized shader) // // The returned pl_shader_res is bound to the lifetime of the pl_shader - and // will only remain valid until the pl_shader is freed or reset. const struct pl_shader_res *pl_shader_finalize(struct pl_shader *sh); // Shader objects represent abstract resources that shaders need to manage in // order to ensure their operation. This could include shader storage buffers, // generated lookup textures, or other sorts of configured state. The body // of a shader object is fully opaque; but the user is in charge of cleaning up // after them and passing them to the right shader passes. // // Note: pl_shader_obj pointers must be initialized to NULL by the caller. struct pl_shader_obj; void pl_shader_obj_destroy(struct pl_shader_obj **obj); #endif // LIBPLACEBO_SHADERS_H_ libplacebo-0.4.0/src/include/libplacebo/shaders/000077500000000000000000000000001324021332500215415ustar00rootroot00000000000000libplacebo-0.4.0/src/include/libplacebo/shaders/colorspace.h000066400000000000000000000320511324021332500240450ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_COLORSPACE_H_ #define LIBPLACEBO_SHADERS_COLORSPACE_H_ // Color space transformation shaders. These all input and output a color // value (PL_SHADER_SIG_COLOR). #include #include #include // Decode the color into normalized RGB, given a specified color_repr. This // also takes care of additional pre- and post-conversions requires for the // "special" color systems (XYZ, BT.2020-C, etc.). If `params` is left as NULL, // it defaults to &pl_color_adjustment_neutral. // // Note: This function always returns PC-range RGB with pre-multiplied alpha. // It mutates the pl_color_repr to reflect the change. void pl_shader_decode_color(struct pl_shader *sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params); // Encodes a color from normalized, PC-range, pre-multiplied RGB into a given // representation. That is, this performs the inverse operation of // `pl_shader_decode_color` (sans color adjustments). void pl_shader_encode_color(struct pl_shader *sh, const struct pl_color_repr *repr); // Linearize (expand) `vec4 color`, given a specified color_transfer. In // essence, this is the ITU-R EOTF, calculated on an idealized (reference) // monitor with a white point of PL_COLOR_REF_WHITE and infinite contrast. void pl_shader_linearize(struct pl_shader *sh, enum pl_color_transfer trc); // Delinearize (compress), given a TRC as output. This corresponds to the // inverse EOTF (not the OETF) in ITU-R terminology, again assuming a // reference monitor. void pl_shader_delinearize(struct pl_shader *sh, enum pl_color_transfer trc); struct pl_sigmoid_params { // The center (bias) of the sigmoid curve. Must be between 0.0 and 1.0. // If left as NULL, defaults to 0.75 float center; // The slope (steepness) of the sigmoid curve. Must be between 1.0 and 20.0. // If left as NULL, defaults to 6.5. float slope; }; extern const struct pl_sigmoid_params pl_sigmoid_default_params; // Applies a sigmoidal color transform to all channels. This helps avoid // ringing artifacts during upscaling by bringing the color information closer // to neutral and away from the extremes. If `params` is NULL, it defaults to // &pl_sigmoid_default_params. // // Warning: This function clamps the input to the interval [0,1]; and as such // it should *NOT* be used on already-decoded high-dynamic range content. void pl_shader_sigmoidize(struct pl_shader *sh, const struct pl_sigmoid_params *params); // This performs the inverse operation to `pl_shader_sigmoidize`. void pl_shader_unsigmoidize(struct pl_shader *sh, const struct pl_sigmoid_params *params); // A collection of various tone mapping algorithms supported by libplacebo. enum pl_tone_mapping_algorithm { // Performs no tone-mapping, just clips out-of-gamut colors. Retains perfect // color accuracy for in-gamut colors but completely destroys out-of-gamut // information. PL_TONE_MAPPING_CLIP, // Generalization of the reinhard tone mapping algorithm to support an // additional linear slope near black. The tone mapping parameter indicates // the trade-off between the linear section and the non-linear section. // Essentially, for param=0.5, every color value below 0.5 will be mapped // linearly, with the higher values being non-linearly tone mapped. Values // near 1.0 make this curve behave like CLIP, and values near 0.0 make this // curve behave like REINHARD. The default value is 0.3, which provides a // good balance between colorimetric accuracy and preserving out-of-gamut // details. The name is derived from its function shape (ax+b)/(cx+d), which // is known as a Möbius transformation in mathematics. PL_TONE_MAPPING_MOBIUS, // Simple non-linear, global tone mapping algorithm. Named after Erik // Reinhard. The parameter specifies the local contrast coefficient at the // display peak. Essentially, a value of param=0.5 implies that the // reference white will be about half as bright as when clipping. Defaults // to 0.5, which results in the simplest formulation of this function. PL_TONE_MAPPING_REINHARD, // Piece-wise, filmic tone-mapping algorithm developed by John Hable for // use in Uncharted 2, inspired by a similar tone-mapping algorithm used by // Kodak. Popularized by its use in video games with HDR rendering. // Preserves both dark and bright details very well, but comes with the // drawback of darkening the overall image quite significantly. Users are // recommended to use HDR peak detection to compensate for the missing // brightness. This is sort of similar to REINHARD tone-mapping + parameter // 0.24. PL_TONE_MAPPING_HABLE, // Fits a gamma (power) function to transfer between the source and target // color spaces. This preserves details at all scales fairly accurately, // but can result in an image with a muted or dull appearance. Best when // combined with peak detection. The parameter is used as the exponent of // the gamma function, defaulting to 1.8. PL_TONE_MAPPING_GAMMA, // Linearly stretches the source gamut to the destination gamut. This will // preserve all details accurately, but results in a significantly darker // image. Best when combined with peak detection. The parameter can be used // as an aditional scaling coefficient to make the image (linearly) // brighter or darker. Defaults to 1.0. PL_TONE_MAPPING_LINEAR, }; struct pl_color_map_params { // The rendering intent to use for RGB->RGB primary conversions. // Defaults to PL_INTENT_RELATIVE_COLORIMETRIC. enum pl_rendering_intent intent; // Algorithm and configuration used for tone-mapping. For non-tunable // algorithms, the `param` is ignored. If the tone mapping parameter is // left as 0.0, the tone-mapping curve's preferred default parameter will // be used. The default algorithm is PL_TONE_MAPPING_HABLE. enum pl_tone_mapping_algorithm tone_mapping_algo; float tone_mapping_param; // Desaturation coefficient. This essentially desaturates very bright // spectral colors towards white, resulting in a more natural-looking // depiction of very bright sunlit regions or images of the sunlit sky. The // coefficient indicates the strength of the desaturation - higher values // desaturate more strongly. The default value is 0.5, which is fairly // conservative - due in part to the excessive use of extremely bright // scenes in badly mastered HDR content. Using a value of 1.0 makes it // approximately match the desaturation strength used by the ACES ODT. A // setting of 0.0 disables this. float tone_mapping_desaturate; // If true, enables the gamut warning feature. This will visibly highlight // all out-of-gamut colors (by inverting them), if they would have been // clipped as a result of gamut/tone mapping. (Obviously, this feature only // really makes sense with TONE_MAPPING_CLIP) bool gamut_warning; // If set to something nonzero, this enables the peak detection feature. // Controls how many frames to smooth (average) the results over, in order // to prevent jitter due to sparkling highlights. Defaults to 63. int peak_detect_frames; // When using peak detection, setting this to a nonzero value enables // scene change detection. If the current frame's average brightness // differs from the averaged frame brightness of the previous frames by // this much or more, the averaged value will be discarded and the state // reset. Doing so helps prevent annoying "eye adaptation"-like effects // when transitioning between dark and bright scenes. Defaults to 0.2. float scene_threshold; }; extern const struct pl_color_map_params pl_color_map_default_params; // Maps `vec4 color` from one color space to another color space according // to the parameters (described in greater depth above). If `params` is left // as NULL, it defaults to &pl_color_map_default_params. If `prelinearized` // is true, the logic will assume the input has already been linearized by the // caller (e.g. as part of a previous linear light scaling operation). // // When the user wishes to use peak detection, `peak_detect_state` should be // set to the pointer of an object that will hold the state for the frame // averaging, which must be destroyed by the user when no longer required. // Successive calls to the same shader should re-use the same object. May // be safely left as NULL, which will disable the peak detection feature. // // Note: Due to the nature of the peak detection implementation, the detected // metadata is delayed by one frame. This may cause a single frame of wrong // metadata on rapid scene transitions, or following the start of playback. void pl_shader_color_map(struct pl_shader *sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, struct pl_shader_obj **peak_detect_state, bool prelinearized); enum pl_dither_method { // Dither with blue noise. Very high quality, but requires the use of a // LUT. Warning: Computing a blue noise texture with a large size can be // very slow, however this only needs to be performed once. Even so, using // this with a `lut_size` greater than 6 is generally ill-advised. This is // the preferred/default dither method. PL_DITHER_BLUE_NOISE, // Dither with an ordered (bayer) dither matrix, using a LUT. Low quality, // and since this also uses a LUT, there's generally no advantage to picking // this instead of `PL_DITHER_BLUE_NOISE`. It's mainly there for testing. PL_DITHER_ORDERED_LUT, // The same as `PL_DITHER_ORDERED_LUT`, but uses fixed function math instead // of a LUT. This is faster, but only supports a fixed dither matrix size // of 16x16 (equal to a `lut_size` of 4). Requires GLSL 130+. PL_DITHER_ORDERED_FIXED, // Dither with white noise. This does not require a LUT and is fairly cheap // to compute. Unlike the other modes it doesn't show any repeating // patterns either spatially or temporally, but the downside is that this // is visually fairly jarring due to the presence of low frequencies in the // noise spectrum. Used as a fallback when the above methods are not // available. PL_DITHER_WHITE_NOISE, }; struct pl_dither_params { // The source of the dither noise to use. enum pl_dither_method method; // For the dither methods which require the use of a LUT, this controls // the size of the LUT (base 2). If left as NULL, this defaults to 6, which // is equivalent to a 64x64 dither matrix. Must not be larger than 8. int lut_size; // Enables temporal dithering. This reduces the persistence of dithering // artifacts by perturbing the dithering matrix per frame. // Warning: This can cause nasty aliasing artifacts on some LCD screens. bool temporal; }; extern const struct pl_dither_params pl_dither_default_params; // Dither the colors to a lower depth, given in bits. This can be used on input // colors of any precision. Basically, this rounds the colors to only linear // multiples of the stated bit depth. The average intensity of the result // will not change (i.e., the dither noise is balanced in both directions). // If `params` is NULL, it defaults to &pl_dither_default_params. // // For the dither methods which require the use of a LUT, `dither_state` must // be set to a valid pointer. To avoid thrashing the resource, users should // avoid trying to re-use the same LUT for different dither configurations. If // passed as NULL, libplacebo will automatically fall back to dither algorithms // that don't require the use of a LUT. // // Warning: This dithering algorithm is not gamma-invariant; so using it for // very low bit depths (below 4 or so) will noticeably increase the brightness // of the resulting image. When doing low bit depth dithering for aesthetic // purposes, it's recommended that the user explicitly (de)linearize the colors // before and after this algorithm. void pl_shader_dither(struct pl_shader *sh, int new_depth, struct pl_shader_obj **dither_state, const struct pl_dither_params *params); #endif // LIBPLACEBO_SHADERS_COLORSPACE_H_ libplacebo-0.4.0/src/include/libplacebo/shaders/sampling.h000066400000000000000000000152111324021332500235240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_SAMPLING_H_ #define LIBPLACEBO_SHADERS_SAMPLING_H_ // Sampling operations. These shaders perform some form of sampling operation // from a given pl_tex. In order to use these, the pl_shader *must* have been // created using the same `ra` as the originating `pl_tex`. Otherwise, this // is undefined behavior. They require nothing (PL_SHADER_SIG_NONE) and return // a color (PL_SHADER_SIG_COLOR). #include #include struct pl_deband_params { // The number of debanding steps to perform per sample. Each step reduces a // bit more banding, but takes time to compute. Note that the strength of // each step falls off very quickly, so high numbers (>4) are practically // useless. Defaults to 1. int iterations; // The debanding filter's cut-off threshold. Higher numbers increase the // debanding strength dramatically, but progressively diminish image // details. Defaults to 4.0. float threshold; // The debanding filter's initial radius. The radius increases linearly // for each iteration. A higher radius will find more gradients, but a // lower radius will smooth more aggressively. Defaults to 16.0. float radius; // Add some extra noise to the image. This significantly helps cover up // remaining quantization artifacts. Higher numbers add more noise. // Note: When debanding HDR sources, even a small amount of grain can // result in a very big change to the brightness level. It's recommended to // either scale this value down or disable it entirely for HDR. // // Defaults to 6.0, which is very mild. float grain; }; extern const struct pl_deband_params pl_deband_default_params; // Debands a given texture and returns the sampled color in `vec4 color`. If // `params` is left as NULL, it defaults to &pl_deband_default_params. Note // that `tex->params.sample_mode` must be PL_TEX_SAMPLE_LINEAR. // // Note: This can also be used as a pure grain function, by setting the number // of iterations to 0. void pl_shader_deband(struct pl_shader *sh, const struct pl_tex *tex, const struct pl_deband_params *params); // Common parameters for sampling operations struct pl_sample_src { const struct pl_tex *tex; // texture to sample struct pl_rect2df rect; // sub-rect to sample from (optional) int components; // number of components to sample (optional) int new_w, new_h; // dimensions of the resulting output (optional) }; // Performs direct / native texture sampling. This uses whatever built-in GPU // sampling is built into the GPU and specified using src->params.sample_mode. // // Note: This is generally very low quality and should be avoided if possible, // for both upscaling and downscaling. The only exception to this rule of thumb // is exact 2x downscaling with PL_TEX_SAMPLE_LINEAR, as well as integer // upscaling with PL_TEX_SAMPLE_NEAREST. bool pl_shader_sample_direct(struct pl_shader *sh, const struct pl_sample_src *src); // Performs hardware-accelerated / efficient bicubic sampling. This is more // efficient than using the generalized sampling routines and // pl_filter_function_bicubic. Requires the source texture to be set up with // sample_mode PL_TEX_SAMPLE_LINEAR. Only works well when upscaling - avoid // for downscaling. bool pl_shader_sample_bicubic(struct pl_shader *sh, const struct pl_sample_src *src); struct pl_sample_filter_params { // The filter to use for sampling. struct pl_filter_config filter; // The precision of the LUT. Defaults to 64 if unspecified. int lut_entries; // See `pl_filter_params.cutoff`. Defaults to 0.001 if unspecified. Only // relevant for polar filters. float cutoff; // Antiringing strength. A value of 0.0 disables antiringing, and a value // of 1.0 enables full-strength antiringing. Defaults to 0.0 if // unspecified. Only relevant for separated/orthogonal filters. float antiring; // Disable the use of compute shaders (e.g. if rendering to non-storable tex) bool no_compute; // Disable the use of filter widening / anti-aliasing (for downscaling) bool no_widening; // This shader object is used to store the LUT, and will be recreated // if necessary. To avoid thrashing the resource, users should avoid trying // to re-use the same LUT for different filter configurations or scaling // ratios. Must be set to a valid pointer, and the target NULL-initialized. struct pl_shader_obj **lut; }; // Performs polar sampling. This internally chooses between an optimized compute // shader, and various fragment shaders, depending on the supported GLSL version // and GPU features. Returns whether or not it was successful. // // Note: `params->filter.polar` must be true to use this function. bool pl_shader_sample_polar(struct pl_shader *sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); enum { PL_SEP_VERT = 0, PL_SEP_HORIZ, PL_SEP_PASSES }; // Performs orthogonal (1D) sampling. Using this twice in a row (once vertical // and once horizontal) effectively performs a 2D upscale. This is lower // quality than polar sampling, but significantly faster, and therefore the // recommended default. Returns whether or not it was successful. // // 0 <= pass < PL_SEP_PASSES indicates which component of the transformation to // apply. PL_SEP_VERT only applies the vertical component, and PL_SEP_HORIZ // only the horizontal. The non-relevant component of the `src->rect` is ignored // entirely. // // Note: Due to internal limitations, this may currently only be used on 2D // textures - even though the basic principle would work for 1D and 3D textures // as well. bool pl_shader_sample_ortho(struct pl_shader *sh, int pass, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); #endif // LIBPLACEBO_SHADERS_SAMPLING_H_ libplacebo-0.4.0/src/include/libplacebo/swapchain.h000066400000000000000000000140141324021332500222360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SWAPCHAIN_H_ #define LIBPLACEBO_SWAPCHAIN_H_ #include #include #include // This abstraction represents a low-level interface to visible surfaces // exposed by a graphics API (and accompanying GPU instance), allowing users to // directly present frames to the screen (or window, typically). This is a // sister API to gpu.h and follows the same convention w.r.t undefined behavior. struct pl_swapchain { struct pl_context *ctx; // the pl_context this swapchain was allocated from struct pl_sw_fns *impl; // the underlying implementation const struct pl_gpu *gpu; // the GPU instance this swapchain belongs to void *priv; }; // Destroys this swapchain. May be used at any time, and may block until the // completion of all outstanding rendering commands. The swapchain and any // resources retrieved from it must not be used afterwards. void pl_swapchain_destroy(const struct pl_swapchain **sw); // Returns the approximate current swapchain latency in vsyncs, or 0 if // unknown. A latency of 1 means that `submit_frame` followed by `swap_buffers` // will block until the just-submitted frame has finished rendering. Typical // values are 2 or 3, which enable better pipelining by allowing the GPU to be // processing one or two frames at the same time as the user is preparing the // next for submission. int pl_swapchain_latency(const struct pl_swapchain *sw); // The struct used to hold the results of `pl_swapchain_start_frame` struct pl_swapchain_frame { // A texture representing the framebuffer users should use for rendering. // It's guaranteed that `fbo->params.renderable` will be true, but no other // guarantees are made - not even that `fbo->params.format` is a real format. const struct pl_tex *fbo; // If true, the user should assume that this framebuffer will be flipped // as a result of presenting it on-screen. If false, nothing special needs // to be done - but if true, users should flip the coordinate system of // the `pl_pass` that is rendering to this framebuffer. // // Note: Normally, libplacebo follows the convention that (0,0) represents // the top left of the image/screen. So when flipped is true, this means // (0,0) on this framebuffer gets displayed as the bottom left of the image. bool flipped; // Indicates the color representation this framebuffer will be interpreted // as by the host system / compositor / display, including the bit depth // and alpha handling (where available). struct pl_color_repr color_repr; struct pl_color_space color_space; }; // Retrieve a new frame from the swapchain. Returns whether successful. It's // worth noting that this function can fail sporadically for benign reasons, // for example the window being invisible or inaccessible. This function may // block until an image is available, which may be the case if the GPU is // rendering frames significantly faster than the display can output them. It // may also be non-blocking, so users shouldn't rely on this call alone in // order to meter rendering speed. (Specifics depend on the underlying graphics // API) bool pl_swapchain_start_frame(const struct pl_swapchain *sw, struct pl_swapchain_frame *out_frame); // Submits the previously started frame. Non-blocking. This must be issued in // lockstep with pl_swapchain_start_frame - there is no way to start multiple // frames and submit them out-of-order. The frames submitted this way will // generally be made visible in a first-in first-out fashion, although // specifics depend on the mechanism used to create the pl_swapchain. (See the // platform-specific APIs for more info). // // Returns whether successful. This should normally never fail, unless the // GPU/surface has been lost or some other critical error has occurred. bool pl_swapchain_submit_frame(const struct pl_swapchain *sw); // Performs a "buffer swap", or some generalization of the concept. In layman's // terms, this blocks until the execution of the Nth previously submitted frame // has been "made complete" in some sense. (The N derives from the swapchain's // built-in latency. See `pl_swapchain_latency` for more information). // // Users should include this call in their rendering loops in order to make // sure they aren't submitting rendering commands faster than the GPU can // process them, which would potentially lead to a queue overrun or exhaust // memory. // // An example loop might look like this: // // while (rendering) { // struct pl_swapchain_frame frame; // bool ok = pl_swapchain_start_frame(swapchain, &frame); // if (!ok) { // /* wait some time, or decide to stop rendering */ // continue; // } // // /* do some rendering with frame.fbo */ // // ok = pl_swapchain_submit_frame(swapchain); // if (!ok) // break; // // pl_swapchain_swap_buffers(swapchain); // } // // The duration this function blocks for, if at all, may be very inconsistent // and should not be used as an authoritative source of vsync timing // information without sufficient smoothing/filtering (and if so, the time that // `start_frame` blocked for should also be included). void pl_swapchain_swap_buffers(const struct pl_swapchain *sw); #endif // LIBPLACEBO_SWAPCHAIN_H_ libplacebo-0.4.0/src/include/libplacebo/utils/000077500000000000000000000000001324021332500212505ustar00rootroot00000000000000libplacebo-0.4.0/src/include/libplacebo/utils/upload.h000066400000000000000000000124351324021332500227120ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #ifndef LIBPLACEBO_UPLOAD_H_ #define LIBPLACEBO_UPLOAD_H_ // This file contains a utility function to assist in uploading data from host // memory to a texture. In particular, the texture will be suitable for use as // a `pl_plane`. // Description of the host representation of an image plane struct pl_plane_data { enum pl_fmt_type type; // meaning of the data (must not be UINT or SINT) int width, height; // dimensions of the plane int component_size[4]; // size in bits of each coordinate int component_pad[4]; // ignored bits preceding each component int component_map[4]; // semantic meaning of each component (pixel order) size_t pixel_stride; // offset in bytes between pixels (required) size_t row_stride; // offset in bytes between rows (optional) const void *pixels; // the actual data underlying this plane // Note: When using this together with `pl_image`, there is some amount of // overlap between `component_pad` and `pl_color_repr.bits`. Some key // differences between the two: // // - the bits from `component_pad` are ignored; whereas the superfluous bits // in a `pl_color_repr` must be 0. // - the `component_pad` exists to align the component size and placement // with the capabilities of GPUs; the `pl_color_repr` exists to control // the semantics of the color samples on a finer granularity. // - the `pl_color_repr` applies to the color sample as a whole, and // therefore applies to all planes; the `component_pad` can be different // for each plane. // - `component_pad` interacts with float textures by moving the actual // float in memory. `pl_color_repr` interacts with float data as if // the float was converted from an integer under full range semantics. // // To help establish the motivating difference, a typical example of a use // case would be yuv420p10. Since 10-bit GPU texture support is limited, // and working with non-byte-aligned pixels is awkward in general, the // convention is to represent yuv420p10 as 16-bit samples with either the // high or low bits set to 0. In this scenario, the `component_size` of the // `pl_plane_repr` and `pl_bit_encoding.sample_depth` would be 16, while // the `pl_bit_encoding.color_depth` would be 10 (and additionally, the // `pl_bit_encoding.bit_shift` would be either 0 or 6, depending on // whether the low or the high bits are used). // // On the contrary, something like a packed, 8-bit XBGR format (where the // X bits are ignored and may contain garbage) would set `component_pad[0]` // to 8, and the component_size[0:2] (respectively) to 8 as well. // // As a general rule of thumb, for maximum compatibility, you should try // and align component_size/component_pad to multiples of 8 and explicitly // clear any remaining superfluous bits (+ use `pl_color_repr.bits` to // ensure they're decoded correctly). You should also try to align the // `pixel_stride` to a power of two. }; // Fills in the `component_size`, `component_pad` and `component_map` fields // based on the supplied mask for each component (in semantic order, i.e. // RGBA). If `mask` does not have a contiguous range of set bits, then the // result is undefined and probably not useful. void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]); // Helper function to find a suitable `pl_fmt` based on a pl_plane_data's // requirements. This is called internally by `pl_upload_plane`, but it's // exposed to users both as a convenience and so they may pre-emptively check // if a format would be supported without actually having to attempt the upload. const struct pl_fmt *pl_plane_find_fmt(const struct pl_gpu *gpu, int out_map[4], const struct pl_plane_data *data); // Upload an image plane to a texture, and update the resulting `pl_plane` // struct. The `plane->texture` will be destroyed and reinitialized if it // does not already exist or is incompatible. Returns whether successful. // // The resulting texture is guaranteed to be `sampleable`, and it will also try // and maximize compatibility with the other `pl_renderer` requirements // (blittable, linear filterable, etc.). // // Important: The user must (eventually) destroy `plane->texture` before // discarding the struct, even if this function returns false! bool pl_upload_plane(const struct pl_gpu *gpu, struct pl_plane *plane, const struct pl_plane_data *data); #endif // LIBPLACEBO_UPLOAD_H_ libplacebo-0.4.0/src/include/libplacebo/vulkan.h000066400000000000000000000175241324021332500215720ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_VULKAN_H_ #define LIBPLACEBO_VULKAN_H_ #include #include #include // Structure representing a VkInstance. Using this is not required. struct pl_vk_inst { VkInstance instance; uint64_t priv; }; struct pl_vk_inst_params { // If set, enable the debugging and validation layers. bool debug; // Enables extra instance extensions. Instance creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // windowing system integration. const char **extensions; int num_extensions; }; extern const struct pl_vk_inst_params pl_vk_inst_default_params; // Helper function to simplify instance creation. The user could also bypass // these helpers and do it manually, but this function is provided as a // convenience. It also sets up a debug callback which forwards all vulkan // messages to the `pl_context` log callback. const struct pl_vk_inst *pl_vk_inst_create(struct pl_context *ctx, const struct pl_vk_inst_params *params); void pl_vk_inst_destroy(const struct pl_vk_inst **inst); // Structure representing the actual vulkan device and associated GPU instance struct pl_vulkan { const struct pl_gpu *gpu; void *priv; // The vulkan objects in use. The user may use this for their own purposes, // but please note that the lifetime is tied to the lifetime of the // pl_vulkan object, and must not be destroyed by the user. Note that the // created vulkan device may have any number of queues and queue family // assignments; so using it for queue submission commands is ill-advised. VkInstance instance; VkPhysicalDevice phys_device; VkDevice device; }; struct pl_vulkan_params { // The vulkan instance. Optional, if NULL then libplacebo will internally // create a VkInstance with no extra extensions or layers - but note that // this is not useful except for offline rendering. VkInstance instance; // When choosing the device, rule out all devices that don't support // presenting to this surface. When creating a device, enable all extensions // needed to ensure we can present to this surface. Optional. Only legal // when specifying an existing VkInstance to use. VkSurfaceKHR surface; // --- Physical device selection options // The vulkan physical device. May be set by the caller to indicate the // physical device to use. Otherwise, libplacebo will pick the "best" // available GPU, based on the advertised device type. (i.e., it will // prefer discrete GPUs over integrated GPUs). Only legal when specifying // an existing VkInstance to use. VkPhysicalDevice device; // When choosing the device, only choose a device with this exact name. // This overrides `allow_software`. No effect if `device` is set. Note: A // list of devices and their names are logged at level PL_LOG_INFO. const char *device_name; // When choosing the device, controls whether or not to also allow software // GPUs. No effect if `device` or `device_name` are set. bool allow_software; // --- Logical device creation options // Controls whether or not to allow asynchronous transfers, using transfer // queue families, if supported by the device. This can be significantly // faster and more power efficient, and also allows streaming uploads in // parallel with rendering commands. Enabled by default. bool async_transfer; // Controls whether or not to allow asynchronous compute, using dedicated // compute queue families, if supported by the device. On some devices, // these can allow the GPU to schedule compute shaders in parallel with // fragment shaders. Enabled by default. bool async_compute; // Limits the number of queues to request. If left as 0, this will enable // as many queues as the device supports. Multiple queues can result in // improved efficiency when submitting multiple commands that can entirely // or partially execute in parallel. Defaults to 1, since using more queues // can actually decrease performance. int queue_count; // Enables extra device extensions. Device creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // interop extensions. const char **extensions; int num_extensions; }; // Default/recommended parameters. Should generally be safe and efficient. extern const struct pl_vulkan_params pl_vulkan_default_params; // Creates a new vulkan device based on the given parameters and initializes // a new GPU. This function will internally initialize a VkDevice. There is // currently no way to share a vulkan device with the caller. If `params` is // left as NULL, it defaults to &pl_vulkan_default_params. const struct pl_vulkan *pl_vulkan_create(struct pl_context *ctx, const struct pl_vulkan_params *params); // Destroys the vulkan device and all associated objects, except for the // VkInstance provided by the user. // // Note that all resources allocated from this vulkan object (e.g. via the // `vk->ra` or using `pl_vulkan_create_swapchain`) *must* be explicitly // destroyed by the user before calling this. void pl_vulkan_destroy(const struct pl_vulkan **vk); struct pl_vulkan_swapchain_params { // The surface to use for rendering. Required, the user is in charge of // creating this. Must belong to the same VkInstance as `vk->instance`. VkSurfaceKHR surface; // The image format and colorspace we should be using. Optional, if left // as {0}, libplacebo will pick the best surface format based on what the // GPU/surface seems to support. VkSurfaceFormatKHR surface_format; // The preferred presentation mode. See the vulkan documentation for more // information about these. If the device/surface combination does not // support this mode, libplacebo will fall back to VK_PRESENT_MODE_FIFO_KHR. // // Warning: Leaving this zero-initialized is the same as having specified // VK_PRESENT_MODE_IMMEDIATE_KHR, which is probably not what the user // wants! VkPresentModeKHR present_mode; // Allow up to N in-flight frames. This essentially controls how many // rendering commands may be queued up at the same time. See the // documentation for `pl_swapchain_get_latency` for more information. For // vulkan specifically, we are only able to wait until the GPU has finished // rendering a frame - we are unable to wait until the display has actually // finished displaying it. So this only provides a rough guideline. // Optional, defaults to 3. int swapchain_depth; }; // Creates a new vulkan swapchain based on an existing VkSurfaceKHR. Using this // function requires that the vulkan device was created with the // VK_KHR_swapchain extension. The easiest way of accomplishing this is to set // the `pl_vulkan_params.surface` explicitly at creation time. const struct pl_swapchain *pl_vulkan_create_swapchain(const struct pl_vulkan *vk, const struct pl_vulkan_swapchain_params *params); #endif // LIBPLACEBO_VULKAN_H_ libplacebo-0.4.0/src/meson.build000066400000000000000000000075641324021332500165470ustar00rootroot00000000000000majorver = '0' apiver = '4' fixver = '0' version = majorver + '.' + apiver + '.' + fixver # Build options mostly taken from mpv build_opts = [ '-D_ISOC99_SOURCE', '-D_GNU_SOURCE', '-fvisibility=hidden', # Warnings '-Wall', '-Wundef', '-Wmissing-prototypes', '-Wshadow', '-Wparentheses', '-Wpointer-arith', '-Wno-pointer-sign', ] cc = meson.get_compiler('c') # clang's version of -Wmissing-braces rejects the common {0} initializers if cc.get_id() == 'clang' build_opts += ['-Wno-missing-braces'] endif # Global dependencies build_deps = [ dependency('threads'), cc.find_library('m', required: false), ] vulkan = dependency('vulkan', version: '>=1.0.42', required: false) # Source files sources = [ 'colorspace.c', 'common.c', 'context.c', 'dither.c', 'dispatch.c', 'filters.c', 'gpu.c', 'renderer.c', 'shaders.c', 'shaders/colorspace.c', 'shaders/sampling.c', 'spirv.c', 'swapchain.c', 'utils/upload.c', # Helpers ported from mpv or other projects 'bstr/bstr.c', 'bstr/format.c', '3rdparty/siphash.c', 'ta/ta.c', 'ta/ta_utils.c', 'ta/talloc.c', ] tests = [ 'context.c', 'colorspace.c', 'dither.c', 'filters.c', 'utils.c', ] # Optional components, in the following format: # [ name, dependency, extra_sources, extra_tests ] components = [ [ 'shaderc', cc.find_library('shaderc_shared', required: false), 'spirv_shaderc.c', ], [ 'vulkan', vulkan, [ 'vulkan/command.c', 'vulkan/context.c', 'vulkan/formats.c', 'vulkan/gpu.c', 'vulkan/malloc.c', 'vulkan/swapchain.c', 'vulkan/utils.c', ], 'vulkan.c' ] ] # Configuration conf = configuration_data() conf.set('majorver', majorver) conf.set('apiver', apiver) conf.set('fixver', fixver) conf.set_quoted('version', 'v' + version) ## Update PL_VERSION with `git describe` information if available git = find_program('git', required: false) if git.found() gitdesc = run_command(git, 'describe') if gitdesc.returncode() == 0 conf.set_quoted('version', gitdesc.stdout().strip()) endif endif # Build process defs = '' foreach c : components name = c[0] dep = c[1] opt = get_option(name) has_dep = opt == 'false' ? false : dep.found() if opt == 'true' and not has_dep error(('Dependency \'@0@\', marked as required, is not found. To build ' + 'without support for @0@, use -D@0@=false.').format(name)) endif pretty = name.underscorify().to_upper() defs += '#define PL_HAVE_@0@ @1@\n'.format(pretty, has_dep ? 1 : 0) if has_dep build_deps += dep if (c.length() > 2) sources += c[2] endif if (c.length() > 3) tests += c[3] endif endif endforeach conf.set('extra_defs', defs) configure_file( input: 'config.h.in', output: 'config.h', install_dir: 'include/libplacebo', configuration: conf, ) add_project_arguments(build_opts, language: 'c') inc = include_directories('./include') lib = library('placebo', sources, install: true, dependencies: build_deps, soversion: apiver, include_directories: inc, ) # Install process install_subdir('include/libplacebo', install_dir: get_option('includedir')) pkg = import('pkgconfig') pkg.generate( name: meson.project_name(), description: 'Reusable library for GPU-accelerated video/image rendering', libraries: lib, version: version, ) # Tests tdeps = [ declare_dependency(link_with: lib) ] if get_option('tests') foreach t : tests e = executable('test.' + t, 'tests/' + t, dependencies: build_deps + tdeps, include_directories: inc ) test(t, e) endforeach endif if get_option('bench') if not vulkan.found() error('Compiling the benchmark suite requires vulkan support!') endif bench = executable('bench', 'tests/bench.c', dependencies: build_deps + tdeps, include_directories: inc ) test('benchmark', bench, is_parallel: false, timeout: 600) endif libplacebo-0.4.0/src/osdep/000077500000000000000000000000001324021332500155035ustar00rootroot00000000000000libplacebo-0.4.0/src/osdep/compiler.h000066400000000000000000000010051324021332500174620ustar00rootroot00000000000000// Copyright (c) 2017 mpv developers #pragma once #define MP_EXPAND_ARGS(...) __VA_ARGS__ #ifdef __GNUC__ #define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format(printf, a1, a2))) #define MP_NORETURN __attribute__((noreturn)) #else #define PRINTF_ATTRIBUTE(a1, a2) #define MP_NORETURN #endif // Broken crap with __USE_MINGW_ANSI_STDIO #if defined(__MINGW32__) && defined(__GNUC__) && !defined(__clang__) #undef PRINTF_ATTRIBUTE #define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (gnu_printf, a1, a2))) #endif libplacebo-0.4.0/src/pl_assert.h000066400000000000000000000024161324021332500165410ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #ifndef NDEBUG # define pl_assert assert #else # define pl_assert(expr) \ do { \ if (!(expr)) { \ fprintf(stderr, "Assertion failed: %s in %s:%d\n", \ #expr, __FILE__, __LINE__); \ abort(); \ } \ } while (0) #endif libplacebo-0.4.0/src/renderer.c000066400000000000000000000710331324021332500163470ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "shaders.h" enum { // The scalers for each plane are set up to be just the index itself SCALER_PLANE0 = 0, SCALER_PLANE1 = 1, SCALER_PLANE2 = 2, SCALER_PLANE3 = 3, SCALER_MAIN, SCALER_COUNT, }; // Canonical plane order aliases enum { PLANE_R = 0, PLANE_G = 1, PLANE_B = 2, PLANE_A = 3, PLANE_COUNT, // aliases for other systems PLANE_Y = PLANE_R, PLANE_CB = PLANE_G, PLANE_CR = PLANE_B, PLANE_CIEX = PLANE_R, PLANE_CIEY = PLANE_G, PLANE_CIEZ = PLANE_B, }; struct sampler { struct pl_shader_obj *upscaler_state; struct pl_shader_obj *downscaler_state; const struct pl_tex *sep_fbo_up; const struct pl_tex *sep_fbo_down; }; struct pl_renderer { const struct pl_gpu *gpu; struct pl_context *ctx; struct pl_dispatch *dp; // Texture format to use for intermediate textures const struct pl_fmt *fbofmt; // Cached feature checks (inverted) bool disable_compute; // disable the use of compute shaders bool disable_sampling; // disable use of advanced scalers bool disable_debanding; // disable the use of debanding shaders bool disable_linear_hdr; // disable linear scaling for HDR signals bool disable_linear_sdr; // disable linear scaling for SDR signals bool disable_blending; // disable blending for the target/fbofmt bool disable_overlay; // disable rendering overlays // Shader resource objects and intermediate textures (FBOs) struct pl_shader_obj *peak_detect_state; struct pl_shader_obj *dither_state; const struct pl_tex *main_scale_fbo; const struct pl_tex *deband_fbos[PLANE_COUNT]; struct sampler samplers[SCALER_COUNT]; struct sampler *osd_samplers; int num_osd_samplers; }; static void find_fbo_format(struct pl_renderer *rr) { struct { enum pl_fmt_type type; int depth; enum pl_fmt_caps caps; } configs[] = { // Prefer floating point formats first {PL_FMT_FLOAT, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_FLOAT, 16, PL_FMT_CAP_SAMPLEABLE}, // Otherwise, fall back to unorm/snorm, preferring linearly sampleable {PL_FMT_UNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_SNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 16, PL_FMT_CAP_SAMPLEABLE}, {PL_FMT_SNORM, 16, PL_FMT_CAP_SAMPLEABLE}, // As a final fallback, allow 8-bit FBO formats (for UNORM only) {PL_FMT_UNORM, 8, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 8, PL_FMT_CAP_SAMPLEABLE}, }; for (int i = 0; i < PL_ARRAY_SIZE(configs); i++) { const struct pl_fmt *fmt; fmt = pl_find_fmt(rr->gpu, configs[i].type, 4, configs[i].depth, 0, configs[i].caps | PL_FMT_CAP_RENDERABLE); if (fmt) { rr->fbofmt = fmt; break; } } if (!rr->fbofmt) { PL_WARN(rr, "Found no renderable FBO format! Most features disabled"); return; } if (!(rr->fbofmt->caps & PL_FMT_CAP_STORABLE)) { PL_INFO(rr, "Found no storable FBO format; compute shaders disabled"); rr->disable_compute = true; } if (rr->fbofmt->type != PL_FMT_FLOAT) { PL_INFO(rr, "Found no floating point FBO format; linear light " "processing disabled for HDR material"); rr->disable_linear_hdr = true; } if (rr->fbofmt->component_depth[0] < 16) { PL_WARN(rr, "FBO format precision low (<16 bit); linear light " "processing disabled"); rr->disable_linear_sdr = true; } } struct pl_renderer *pl_renderer_create(struct pl_context *ctx, const struct pl_gpu *gpu) { struct pl_renderer *rr = talloc_ptrtype(NULL, rr); *rr = (struct pl_renderer) { .gpu = gpu, .ctx = ctx, .dp = pl_dispatch_create(ctx, gpu), }; assert(rr->dp); find_fbo_format(rr); return rr; } static void sampler_destroy(struct pl_renderer *rr, struct sampler *sampler) { pl_shader_obj_destroy(&sampler->upscaler_state); pl_shader_obj_destroy(&sampler->downscaler_state); pl_tex_destroy(rr->gpu, &sampler->sep_fbo_up); pl_tex_destroy(rr->gpu, &sampler->sep_fbo_down); } void pl_renderer_destroy(struct pl_renderer **p_rr) { struct pl_renderer *rr = *p_rr; if (!rr) return; // Free all intermediate FBOs pl_tex_destroy(rr->gpu, &rr->main_scale_fbo); for (int i = 0; i < PL_ARRAY_SIZE(rr->deband_fbos); i++) pl_tex_destroy(rr->gpu, &rr->deband_fbos[i]); // Free all shader resource objects pl_shader_obj_destroy(&rr->peak_detect_state); pl_shader_obj_destroy(&rr->dither_state); // Free all samplers for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers); i++) sampler_destroy(rr, &rr->samplers[i]); for (int i = 0; i < rr->num_osd_samplers; i++) sampler_destroy(rr, &rr->osd_samplers[i]); pl_dispatch_destroy(&rr->dp); TA_FREEP(p_rr); } void pl_renderer_flush_cache(struct pl_renderer *rr) { // TODO } const struct pl_render_params pl_render_default_params = { .upscaler = &pl_filter_spline36, .downscaler = &pl_filter_mitchell, .frame_mixer = NULL, .deband_params = &pl_deband_default_params, .sigmoid_params = &pl_sigmoid_default_params, .color_map_params = &pl_color_map_default_params, .dither_params = &pl_dither_default_params, }; // Represents a "in-flight" image, which is a shader that's in the process of // producing some sort of image struct img { struct pl_shader *sh; int w, h; // Current effective source area, will be sampled by the main scaler struct pl_rect2df rect; // The current effective colorspace struct pl_color_repr repr; struct pl_color_space color; int comps; }; static const struct pl_tex *finalize_img(struct pl_renderer *rr, struct img *img, const struct pl_fmt *fmt, const struct pl_tex **tex) { bool ok = pl_tex_recreate(rr->gpu, tex, &(struct pl_tex_params) { .w = img->w, .h = img->h, .format = fmt, .sampleable = true, .renderable = true, // Just enable what we can .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE), .sample_mode = (fmt->caps & PL_FMT_CAP_LINEAR) ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, }); if (!ok) { PL_ERR(rr, "Failed creating FBO texture! Disabling advanced rendering.."); rr->fbofmt = NULL; pl_dispatch_abort(rr->dp, &img->sh); return NULL; } if (!pl_dispatch_finish(rr->dp, &img->sh, *tex, NULL, NULL)) { PL_ERR(rr, "Failed dispatching intermediate pass!"); return NULL; } return *tex; } struct pass_state { // Represents the "current" image which we're in the process of rendering. // This is initially set by pass_read_image, and all of the subsequent // rendering steps will mutate this in-place. struct img cur_img; }; static void dispatch_sampler(struct pl_renderer *rr, struct pl_shader *sh, float rx, float ry, struct sampler *sampler, const struct pl_render_params *params, const struct pl_sample_src *src) { bool is_linear = src->tex->params.sample_mode == PL_TEX_SAMPLE_LINEAR; bool no_samplers = !params->upscaler && !params->downscaler; if (!rr->fbofmt || rr->disable_sampling || !sampler || no_samplers) goto fallback; const struct pl_filter_config *config = NULL; struct pl_shader_obj **lut; const struct pl_tex **sep_fbo; rx = fabs(rx); ry = fabs(ry); if (rx < 1.0 - 1e-6 || ry < 1.0 - 1e-6) { config = params->downscaler; lut = &sampler->downscaler_state; sep_fbo = &sampler->sep_fbo_down; } else if (rx > 1.0 + 1e-6 || ry > 1.0 + 1e-6) { config = params->upscaler; lut = &sampler->upscaler_state; sep_fbo = &sampler->sep_fbo_up; } else { // no scaling goto direct; } if (!config) goto fallback; // Try using faster replacements for GPU built-in scalers bool can_fast = config == params->upscaler || params->skip_anti_aliasing; if (can_fast && !params->disable_builtin_scalers) { if (is_linear && config == &pl_filter_bicubic) goto fallback; // the bicubic check will succeed if (is_linear && config == &pl_filter_triangle) goto direct; if (!is_linear && config == &pl_filter_box) goto direct; } struct pl_sample_filter_params fparams = { .filter = *config, .lut_entries = params->lut_entries, .cutoff = params->polar_cutoff, .antiring = params->antiringing_strength, .no_compute = rr->disable_compute, .no_widening = params->skip_anti_aliasing, .lut = lut, }; bool ok; if (config->polar) { ok = pl_shader_sample_polar(sh, src, &fparams); } else { struct pl_shader *tsh = pl_dispatch_begin(rr->dp); ok = pl_shader_sample_ortho(tsh, PL_SEP_VERT, src, &fparams); if (!ok) { pl_dispatch_abort(rr->dp, &tsh); goto done; } struct img img = { .sh = tsh, .w = src->tex->params.w, .h = src->new_h, }; struct pl_sample_src src2 = *src; src2.tex = finalize_img(rr, &img, rr->fbofmt, sep_fbo); ok = pl_shader_sample_ortho(sh, PL_SEP_HORIZ, &src2, &fparams); } done: if (!ok) { PL_ERR(rr, "Failed dispatching scaler.. disabling"); rr->disable_sampling = true; goto fallback; } return; fallback: // Use bicubic sampling if supported if (rr->fbofmt && is_linear) { pl_shader_sample_bicubic(sh, src); return; } direct: // If all else fails, fall back to bilinear/nearest pl_shader_sample_direct(sh, src); } static void draw_overlays(struct pl_renderer *rr, const struct pl_tex *fbo, const struct pl_overlay *overlays, int num, struct pl_color_space color, bool use_sigmoid, struct pl_transform2x2 *scale, const struct pl_render_params *params) { if (num <= 0 || rr->disable_overlay) return; enum pl_fmt_caps caps = fbo->params.format->caps; if (!rr->disable_blending && !(caps & PL_FMT_CAP_BLENDABLE)) { PL_WARN(rr, "Trying to draw an overlay to a non-blendable target. " "Alpha blending is disabled, results may be incorrect!"); rr->disable_blending = true; } while (num > rr->num_osd_samplers) { TARRAY_APPEND(rr, rr->osd_samplers, rr->num_osd_samplers, (struct sampler) {0}); } for (int n = 0; n < num; n++) { const struct pl_overlay *ol = &overlays[n]; const struct pl_plane *plane = &ol->plane; const struct pl_tex *tex = plane->texture; struct pl_rect2d rect = ol->rect; if (scale) { float v0[2] = { rect.x0, rect.y0 }; float v1[2] = { rect.x1, rect.y1 }; pl_transform2x2_apply(scale, v0); pl_transform2x2_apply(scale, v1); rect = (struct pl_rect2d) { v0[0], v0[1], v1[0], v1[1] }; } struct pl_sample_src src = { .tex = tex, .components = ol->mode == PL_OVERLAY_MONOCHROME ? 1 : plane->components, .new_w = abs(pl_rect_w(rect)), .new_h = abs(pl_rect_h(rect)), .rect = { -plane->shift_x, -plane->shift_y, tex->params.w - plane->shift_x, tex->params.h - plane->shift_y, }, }; float rx = (float) src.new_w / src.tex->params.w, ry = (float) src.new_h / src.tex->params.h; struct sampler *sampler = &rr->osd_samplers[n]; if (params->disable_overlay_sampling) sampler = NULL; struct pl_shader *sh = pl_dispatch_begin(rr->dp); dispatch_sampler(rr, sh, rx, ry, sampler, params, &src); GLSL("vec4 osd_color;\n"); for (int c = 0; c < src.components; c++) { if (plane->component_mapping[c] < 0) continue; GLSL("osd_color[%d] = color[%d];\n", plane->component_mapping[c], tex->params.format->sample_order[c]); } switch (ol->mode) { case PL_OVERLAY_NORMAL: GLSL("color = osd_color;\n"); break; case PL_OVERLAY_MONOCHROME: GLSL("color.a = osd_color[0];\n"); GLSL("color.rgb = %s;\n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("base_color"), .data = &ol->base_color, .dynamic = true, })); break; default: abort(); } struct pl_color_repr repr = ol->repr; pl_shader_decode_color(sh, &repr, NULL); pl_shader_color_map(sh, params->color_map_params, ol->color, color, NULL, false); if (use_sigmoid) pl_shader_sigmoidize(sh, params->sigmoid_params); static const struct pl_blend_params blend_params = { .src_rgb = PL_BLEND_SRC_ALPHA, .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA, .src_alpha = PL_BLEND_ONE, .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA, }; const struct pl_blend_params *blend = &blend_params; if (rr->disable_blending) blend = NULL; if (!pl_dispatch_finish(rr->dp, &sh, fbo, &rect, blend)) { PL_ERR(rr, "Failed rendering overlay texture!"); rr->disable_overlay = true; return; } } } static void deband_plane(struct pl_renderer *rr, struct pl_plane *plane, const struct pl_tex **fbo, const struct pl_render_params *params) { if (!rr->fbofmt || rr->disable_debanding || !params->deband_params) return; const struct pl_tex *tex = plane->texture; if (tex->params.sample_mode != PL_TEX_SAMPLE_LINEAR) { PL_WARN(rr, "Debanding requires uploaded textures to be linearly " "sampleable (params.sample_mode = PL_TEX_SAMPLE_LINEAR)! " "Disabling debanding.."); rr->disable_debanding = true; return; } struct pl_shader *sh = pl_dispatch_begin(rr->dp); pl_shader_deband(sh, tex, params->deband_params); struct img img = { .sh = sh, .w = tex->params.w, .h = tex->params.h, }; const struct pl_tex *new = finalize_img(rr, &img, rr->fbofmt, fbo); if (!new) { PL_ERR(rr, "Failed dispatching debanding shader.. disabling debanding!"); rr->disable_debanding = true; return; } plane->texture = new; } // This scales and merges all of the source images, and initializes the cur_img. static bool pass_read_image(struct pl_renderer *rr, struct pass_state *pass, const struct pl_image *image, const struct pl_render_params *params) { struct pl_shader *sh = pl_dispatch_begin(rr->dp); sh_require(sh, PL_SHADER_SIG_NONE, 0, 0); // Initialize the color to black const char *neutral = "0.0, 0.0, 0.0"; if (pl_color_system_is_ycbcr_like(image->repr.sys)) neutral = "0.0, 0.5, 0.5"; GLSL("vec4 color = vec4(%s, 1.0); \n" "// pass_read_image \n" "{ \n" "vec4 tmp; \n", neutral); // First of all, we have to pick a "reference" plane for alignment. // This should ideally be the plane that most closely matches the target // image size struct pl_plane planes[4]; const struct pl_plane *refplane = NULL; // points to one of `planes` int best_diff = 0, best_off = 0; pl_assert(image->num_planes < PLANE_COUNT); for (int i = 0; i < image->num_planes; i++) { struct pl_plane *plane = &planes[i]; *plane = image->planes[i]; const struct pl_tex *tex = plane->texture; int diff = PL_MAX(abs(tex->params.w - image->width), abs(tex->params.h - image->height)); int off = PL_MAX(plane->shift_x, plane->shift_y); if (!refplane || diff < best_diff || (diff == best_diff && off < best_off)) { refplane = plane; best_diff = diff; best_off = off; } } if (!refplane) { PL_ERR(rr, "Image contains no planes?"); return false; } float ref_w = refplane->texture->params.w, ref_h = refplane->texture->params.h; // Round the src_rect up to the nearest integer size struct pl_rect2d rc = { floorf(image->src_rect.x0), floorf(image->src_rect.y0), ceilf(image->src_rect.x1), ceilf(image->src_rect.y1), }; int target_w = pl_rect_w(rc), target_h = pl_rect_h(rc); pl_assert(target_w > 0 && target_h > 0); bool has_alpha = false; for (int i = 0; i < image->num_planes; i++) { struct pl_shader *psh = pl_dispatch_begin(rr->dp); struct pl_plane *plane = &planes[i]; deband_plane(rr, plane, &rr->deband_fbos[i], params); // Compute the source shift/scale relative to the reference size float pw = plane->texture->params.w, ph = plane->texture->params.h, rx = ref_w / pw, ry = ref_h / ph, sx = plane->shift_x - refplane->shift_x, sy = plane->shift_y - refplane->shift_y; // Only accept integer scaling ratios. This accounts for the fact // that fractionally subsampled planes get rounded up to the nearest // integer size, which we want to discard. float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); struct pl_sample_src src = { .tex = plane->texture, .components = plane->components, .new_w = target_w, .new_h = target_h, .rect = { rc.x0 / rrx - sx / rx, rc.y0 / rry - sy / ry, rc.x1 / rrx - sx / rx, rc.y1 / rry - sy / ry, }, }; // FIXME: in theory, we could reuse the debanding result from // `deband_plane` if available, instead of having to dispatch a no-op // shader, for trivial sampling cases (no scaling or shifting) dispatch_sampler(rr, psh, rrx, rry, &rr->samplers[i], params, &src); ident_t sub = sh_subpass(sh, psh); if (!sub) { PL_ERR(sh, "Failed dispatching subpass for plane.. disabling " "scalers"); rr->disable_sampling = true; pl_dispatch_abort(rr->dp, &psh); pl_dispatch_abort(rr->dp, &sh); // FIXME: instead of erroring here, instead render out to a cache // FBO and sample from that instead return false; } GLSL("tmp = %s();\n", sub); for (int c = 0; c < src.components; c++) { if (plane->component_mapping[c] < 0) continue; GLSL("color[%d] = tmp[%d];\n", plane->component_mapping[c], plane->texture->params.format->sample_order[c]); has_alpha |= plane->component_mapping[c] == PLANE_A; } // we don't need it anymore pl_dispatch_abort(rr->dp, &psh); } float basex = image->src_rect.x0 - rc.x0 - refplane->shift_x, basey = image->src_rect.y0 - rc.y0 - refplane->shift_y; pass->cur_img = (struct img) { .sh = sh, .w = target_w, .h = target_h, .repr = image->repr, .color = image->color, .comps = has_alpha ? 4 : 3, .rect = { basex, basey, basex + pl_rect_w(image->src_rect), basey + pl_rect_h(image->src_rect), }, }; // Convert the image colorspace pl_shader_decode_color(sh, &pass->cur_img.repr, params->color_adjustment); GLSL("}\n"); return true; } static bool pass_scale_main(struct pl_renderer *rr, struct pass_state *pass, const struct pl_image *image, const struct pl_render_target *target, const struct pl_render_params *params) { struct img *img = &pass->cur_img; int target_w = abs(pl_rect_w(target->dst_rect)), target_h = abs(pl_rect_h(target->dst_rect)); float src_w = pl_rect_w(image->src_rect), src_h = pl_rect_h(image->src_rect); pl_assert(src_w > 0 && src_h > 0); float rx = target_w / src_w, ry = target_h / src_h; if (!rr->fbofmt) { PL_TRACE(rr, "Skipping main scaler (no FBOs)"); return true; } if ((!params->upscaler && !params->downscaler) || rr->disable_sampling) { PL_TRACE(rr, "Skipping main scaler (no samplers)"); return true; } bool downscaling = rx < 1.0 - 1e-6 || ry < 1.0 - 1e-6; bool upscaling = !downscaling && (rx > 1.0 + 1e-6 || ry > 1.0 + 1e-6); bool need_osd = image->num_overlays > 0; if (!downscaling && !upscaling && !need_osd) { PL_TRACE(rr, "Skipping main scaler (would be no-op)"); return true; } bool use_sigmoid = upscaling && params->sigmoid_params; bool use_linear = use_sigmoid || downscaling; // Hard-disable both sigmoidization and linearization when requested if (params->disable_linear_scaling) use_sigmoid = use_linear = false; // Avoid sigmoidization for HDR content because it clips to [0,1] if (pl_color_transfer_is_hdr(img->color.transfer)) use_sigmoid = false; if (use_linear) { pl_shader_linearize(img->sh, img->color.transfer); img->color.transfer = PL_COLOR_TRC_LINEAR; } if (use_sigmoid) pl_shader_sigmoidize(img->sh, params->sigmoid_params); struct pl_sample_src src = { .tex = finalize_img(rr, img, rr->fbofmt, &rr->main_scale_fbo), .components = img->comps, .new_w = target_w, .new_h = target_h, .rect = img->rect, }; if (!src.tex) return false; // Draw overlay on top of the intermediate image if needed draw_overlays(rr, src.tex, image->overlays, image->num_overlays, img->color, use_sigmoid, NULL, params); struct pl_shader *sh = pl_dispatch_begin(rr->dp); dispatch_sampler(rr, sh, rx, ry, &rr->samplers[SCALER_MAIN], params, &src); pass->cur_img = (struct img) { .sh = sh, .w = target_w, .h = target_h, .repr = img->repr, .color = img->color, .comps = img->comps, }; if (use_sigmoid) pl_shader_unsigmoidize(sh, params->sigmoid_params); return true; } static bool pass_output_target(struct pl_renderer *rr, struct pass_state *pass, const struct pl_render_target *target, const struct pl_render_params *params) { const struct pl_tex *fbo = target->fbo; // Color management struct pl_shader *sh = pass->cur_img.sh; pl_shader_color_map(sh, params->color_map_params, pass->cur_img.color, target->color, &rr->peak_detect_state, false); pl_shader_encode_color(sh, &target->repr); // FIXME: Technically we should try dithering before bit shifting if we're // going to be encoding to a low bit depth, since the caller might end up // discarding the extra bits. Ideally, we would pull the `bit_shift` out // of the `target->repr` and apply it separately after dithering. if (params->dither_params) { // Just assume the first component's depth is canonical. This works // in practice, since for cases like rgb565 we want to use the lower // depth anyway. Plus, every format has at least one component. int fmt_depth = fbo->params.format->component_depth[0]; int depth = PL_DEF(target->repr.bits.sample_depth, fmt_depth); // Ignore dithering for >16-bit FBOs, since it's pretty pointless if (depth <= 16) pl_shader_dither(sh, depth, &rr->dither_state, params->dither_params); } bool is_comp = pl_shader_is_compute(sh); if (is_comp && !fbo->params.storable) { // TODO: force caching abort(); } pl_assert(fbo->params.renderable); return pl_dispatch_finish(rr->dp, &pass->cur_img.sh, fbo, &target->dst_rect, NULL); } static void fix_rects(struct pl_image *image, struct pl_render_target *target) { pl_assert(image->width && image->height); // Initialize the rects to the full size if missing if ((!image->src_rect.x0 && !image->src_rect.x1) || (!image->src_rect.y0 && !image->src_rect.y1)) { image->src_rect = (struct pl_rect2df) { 0, 0, image->width, image->height, }; } if ((!target->dst_rect.x0 && !target->dst_rect.x1) || (!target->dst_rect.y0 && !target->dst_rect.y1)) { target->dst_rect = (struct pl_rect2d) { 0, 0, target->fbo->params.w, target->fbo->params.h, }; } // We always want to prefer flipping in the dst_rect over flipping in // the src_rect. They're functionally equivalent either way. if (image->src_rect.x0 > image->src_rect.x1) { PL_SWAP(image->src_rect.x0, image->src_rect.x1); PL_SWAP(target->dst_rect.x0, target->dst_rect.x1); } if (image->src_rect.y0 > image->src_rect.y1) { PL_SWAP(image->src_rect.y0, image->src_rect.y1); PL_SWAP(target->dst_rect.y0, target->dst_rect.y1); } } bool pl_render_image(struct pl_renderer *rr, const struct pl_image *pimage, const struct pl_render_target *ptarget, const struct pl_render_params *params) { params = PL_DEF(params, &pl_render_default_params); struct pl_image image = *pimage; struct pl_render_target target = *ptarget; fix_rects(&image, &target); // TODO: output caching pl_dispatch_reset_frame(rr->dp); struct pass_state pass = {0}; if (!pass_read_image(rr, &pass, &image, params)) goto error; if (!pass_scale_main(rr, &pass, &image, &target, params)) goto error; if (!pass_output_target(rr, &pass, &target, params)) goto error; // If we don't have FBOs available, simulate the on-image overlays at // this stage if (image.num_overlays > 0 && !rr->fbofmt) { float rx = pl_rect_w(target.dst_rect) / pl_rect_w(image.src_rect), ry = pl_rect_h(target.dst_rect) / pl_rect_h(image.src_rect); struct pl_transform2x2 scale = { .mat = {{{ rx, 0.0 }, { 0.0, ry }}}, .c = { target.dst_rect.x0 - image.src_rect.x0 * rx, target.dst_rect.y0 - image.src_rect.y0 * ry }, }; draw_overlays(rr, target.fbo, image.overlays, image.num_overlays, target.color, false, &scale, params); } // Draw the final output overlays draw_overlays(rr, target.fbo, target.overlays, target.num_overlays, target.color, false, NULL, params); return true; error: pl_dispatch_abort(rr->dp, &pass.cur_img.sh); PL_ERR(rr, "Failed rendering image!"); return false; } void pl_render_target_from_swapchain(struct pl_render_target *out_target, const struct pl_swapchain_frame *frame) { const struct pl_tex *fbo = frame->fbo; *out_target = (struct pl_render_target) { .fbo = fbo, .dst_rect = { 0, 0, fbo->params.w, fbo->params.h }, .repr = frame->color_repr, .color = frame->color_space, }; } libplacebo-0.4.0/src/shaders.c000066400000000000000000000565271324021332500162050ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "bstr/bstr.h" #include "common.h" #include "context.h" #include "shaders.h" struct pl_shader *pl_shader_alloc(struct pl_context *ctx, const struct pl_gpu *gpu, uint8_t ident, uint8_t index) { pl_assert(ctx); struct pl_shader *sh = talloc_ptrtype(ctx, sh); *sh = (struct pl_shader) { .ctx = ctx, .gpu = gpu, .mutable = true, .tmp = talloc_ref_new(ctx), .ident = ident, .index = index, }; return sh; } void pl_shader_free(struct pl_shader **psh) { struct pl_shader *sh = *psh; if (!sh) return; talloc_ref_deref(&sh->tmp); TA_FREEP(psh); } void pl_shader_reset(struct pl_shader *sh, uint8_t ident, uint8_t index) { struct pl_shader new = { .ctx = sh->ctx, .gpu = sh->gpu, .tmp = talloc_ref_new(sh->ctx), .mutable = true, .ident = ident, .index = index, // Preserve array allocations .res = { .variables = sh->res.variables, .descriptors = sh->res.descriptors, .vertex_attribs = sh->res.vertex_attribs, }, }; // Preserve buffer allocations for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++) new.buffers[i] = (struct bstr) { .start = sh->buffers[i].start }; talloc_ref_deref(&sh->tmp); *sh = new; } bool sh_try_compute(struct pl_shader *sh, int bw, int bh, bool flex, size_t mem) { pl_assert(bw && bh); int *sh_bw = &sh->res.compute_group_size[0]; int *sh_bh = &sh->res.compute_group_size[1]; if (!sh->gpu || !(sh->gpu->caps & PL_GPU_CAP_COMPUTE)) { PL_TRACE(sh, "Disabling compute shader due to missing PL_GPU_CAP_COMPUTE"); return false; } if (sh->res.compute_shmem + mem > sh->gpu->limits.max_shmem_size) { PL_TRACE(sh, "Disabling compute shader due to insufficient shmem"); return false; } sh->res.compute_shmem += mem; // If the current shader is either not a compute shader, or we have no // choice but to override the metadata, always do so if (!sh->is_compute || (sh->flexible_work_groups && !flex)) { *sh_bw = bw; *sh_bh = bh; sh->is_compute = true; return true; } // If both shaders are flexible, pick the larger of the two if (sh->flexible_work_groups && flex) { *sh_bw = PL_MAX(*sh_bw, bw); *sh_bh = PL_MAX(*sh_bh, bh); return true; } // If the other shader is rigid but this is flexible, change nothing if (flex) return true; // If neither are flexible, make sure the parameters match pl_assert(!flex && !sh->flexible_work_groups); if (bw != *sh_bw || bh != *sh_bh) { PL_TRACE(sh, "Disabling compute shader due to incompatible group " "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh); sh->res.compute_shmem -= mem; return false; } return true; } bool pl_shader_is_compute(const struct pl_shader *sh) { return sh->is_compute; } bool pl_shader_output_size(const struct pl_shader *sh, int *w, int *h) { if (!sh->output_w || !sh->output_h) return false; *w = sh->output_w; *h = sh->output_h; return true; } uint64_t pl_shader_signature(const struct pl_shader *sh) { uint64_t res = 0; for (int i = 0; i < PL_ARRAY_SIZE(sh->buffers); i++) res ^= bstr_hash64(sh->buffers[i]); // FIXME: also hash in the configuration of the descriptors/variables return res; } ident_t sh_fresh(struct pl_shader *sh, const char *name) { return talloc_asprintf(sh->tmp, "_%s_%d_%u", PL_DEF(name, "var"), sh->fresh++, sh->ident); } ident_t sh_var(struct pl_shader *sh, struct pl_shader_var sv) { sv.var.name = sh_fresh(sh, sv.var.name); sv.data = talloc_memdup(sh->tmp, sv.data, pl_var_host_layout(0, &sv.var).size); TARRAY_APPEND(sh, sh->res.variables, sh->res.num_variables, sv); return (ident_t) sv.var.name; } ident_t sh_desc(struct pl_shader *sh, struct pl_shader_desc sd) { sd.desc.name = sh_fresh(sh, sd.desc.name); TARRAY_APPEND(sh, sh->res.descriptors, sh->res.num_descriptors, sd); return (ident_t) sd.desc.name; } ident_t sh_attr_vec2(struct pl_shader *sh, const char *name, const struct pl_rect2df *rc) { if (!sh->gpu) { PL_ERR(sh, "Failed adding vertex attr '%s': No GPU available!", name); return NULL; } const struct pl_fmt *fmt = pl_find_vertex_fmt(sh->gpu, PL_FMT_FLOAT, 2); if (!fmt) { PL_ERR(sh, "Failed adding vertex attr '%s': no vertex fmt!", name); return NULL; } float vals[4][2] = { { rc->x0, rc->y0 }, { rc->x1, rc->y0 }, { rc->x0, rc->y1 }, { rc->x1, rc->y1 }, }; float *data = talloc_memdup(sh->tmp, &vals[0][0], sizeof(vals)); struct pl_shader_va va = { .attr = { .name = sh_fresh(sh, name), .fmt = pl_find_vertex_fmt(sh->gpu, PL_FMT_FLOAT, 2), }, .data = { &data[0], &data[2], &data[4], &data[6] }, }; TARRAY_APPEND(sh, sh->res.vertex_attribs, sh->res.num_vertex_attribs, va); return (ident_t) va.attr.name; } ident_t sh_bind(struct pl_shader *sh, const struct pl_tex *tex, const char *name, const struct pl_rect2df *rect, ident_t *out_pos, ident_t *out_size, ident_t *out_pt) { if (!sh->gpu) { PL_ERR(sh, "Failed binding texture '%s': No GPU available!", name); return NULL; } if (pl_tex_params_dimension(tex->params) != 2 || !tex->params.sampleable) { PL_ERR(sh, "Failed binding texture '%s': incompatible params!", name); return NULL; } ident_t itex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = name, .type = PL_DESC_SAMPLED_TEX, }, .object = tex, }); if (out_pos) { struct pl_rect2df full = { .x1 = tex->params.w, .y1 = tex->params.h, }; rect = PL_DEF(rect, &full); *out_pos = sh_attr_vec2(sh, "tex_coord", &(struct pl_rect2df) { .x0 = rect->x0 / tex->params.w, .y0 = rect->y0 / tex->params.h, .x1 = rect->x1 / tex->params.w, .y1 = rect->y1 / tex->params.h, }); } if (out_size) { *out_size = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_size"), .data = &(float[2]) {tex->params.w, tex->params.h}, }); } if (out_pt) { *out_pt = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_pt"), .data = &(float[2]) {1.0 / tex->params.w, 1.0 / tex->params.h}, }); } return itex; } void pl_shader_append(struct pl_shader *sh, enum pl_shader_buf buf, const char *fmt, ...) { pl_assert(buf >= 0 && buf < SH_BUF_COUNT); va_list ap; va_start(ap, fmt); bstr_xappend_vasprintf_c(sh, &sh->buffers[buf], fmt, ap); va_end(ap); } static const char *outsigs[] = { [PL_SHADER_SIG_NONE] = "void", [PL_SHADER_SIG_COLOR] = "vec4", }; static const char *insigs[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "vec4 color", }; static const char *retvals[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "return color;", }; ident_t sh_subpass(struct pl_shader *sh, const struct pl_shader *sub) { pl_assert(sh->mutable); // Check for shader compatibility int res_w = PL_DEF(sh->output_w, sub->output_w), res_h = PL_DEF(sh->output_h, sub->output_h); if ((sub->output_w && res_w != sub->output_w) || (sub->output_h && res_h != sub->output_h)) { PL_ERR(sh, "Failed merging shaders: incompatible sizes: %dx%d and %dx%d", sh->output_w, sh->output_h, sub->output_w, sub->output_h); return NULL; } if (sub->is_compute) { int subw = sub->res.compute_group_size[0], subh = sub->res.compute_group_size[1]; bool flex = sub->flexible_work_groups; if (!sh_try_compute(sh, subw, subh, flex, sub->res.compute_shmem)) { PL_ERR(sh, "Failed merging shaders: incompatible block sizes or " "exceeded shared memory resource capabilities"); return NULL; } } sh->output_w = res_w; sh->output_h = res_h; // Append the prelude and header bstr_xappend(sh, &sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]); bstr_xappend(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_HEADER]); // Append the body as a new header function ident_t name = sh_fresh(sh, "sub"); GLSLH("%s %s(%s) {\n", outsigs[sub->res.output], name, insigs[sub->res.input]); bstr_xappend(sh, &sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]); GLSLH("%s }\n", retvals[sub->res.output]); // Copy over all of the descriptors etc. talloc_ref_attach(sh->tmp, sub->tmp); #define COPY(f) TARRAY_CONCAT(sh, sh->res.f, sh->res.num_##f, \ sub->res.f, sub->res.num_##f) COPY(variables); COPY(descriptors); COPY(vertex_attribs); #undef COPY return name; } // Finish the current shader body and return its function name static ident_t sh_split(struct pl_shader *sh) { pl_assert(sh->mutable); // Concatenate the body onto the head as a new function ident_t name = sh_fresh(sh, "main"); GLSLH("%s %s(%s) {\n", outsigs[sh->res.output], name, insigs[sh->res.input]); if (sh->buffers[SH_BUF_BODY].len) { bstr_xappend(sh, &sh->buffers[SH_BUF_HEADER], sh->buffers[SH_BUF_BODY]); sh->buffers[SH_BUF_BODY].len = 0; sh->buffers[SH_BUF_BODY].start[0] = '\0'; // for sanity / efficiency } GLSLH("%s }\n", retvals[sh->res.output]); return name; } const struct pl_shader_res *pl_shader_finalize(struct pl_shader *sh) { if (!sh->mutable) { PL_WARN(sh, "Attempted to finalize a shader twice?"); return &sh->res; } // Split the shader. This finalizes the body and adds it to the header sh->res.name = sh_split(sh); // Concatenate the header onto the prelude to form the final output struct bstr *glsl = &sh->buffers[SH_BUF_PRELUDE]; bstr_xappend(sh, glsl, sh->buffers[SH_BUF_HEADER]); // Update the result pointer and return sh->res.glsl = glsl->start; sh->mutable = false; return &sh->res; } bool sh_require(struct pl_shader *sh, enum pl_shader_sig insig, int w, int h) { if (!sh->mutable) { PL_ERR(sh, "Attempted to modify an immutable shader!"); return false; } if ((w && sh->output_w && sh->output_w != w) || (h && sh->output_h && sh->output_h != h)) { PL_ERR(sh, "Illegal sequence of shader operations: Incompatible " "output size requirements %dx%d and %dx%d", sh->output_w, sh->output_h, w, h); return false; } static const char *names[] = { [PL_SHADER_SIG_NONE] = "PL_SHADER_SIG_NONE", [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR", }; // If we require an input, but there is none available - just get it from // the user by turning it into an explicit input signature. if (!sh->res.output && insig) { pl_assert(!sh->res.input); sh->res.input = insig; } else if (sh->res.output != insig) { PL_ERR(sh, "Illegal sequence of shader operations! Current output " "signature is '%s', but called operation expects '%s'!", names[sh->res.output], names[insig]); return false; } // All of our shaders end up returning a vec4 color sh->res.output = PL_SHADER_SIG_COLOR; sh->output_w = PL_DEF(sh->output_w, w); sh->output_h = PL_DEF(sh->output_h, h); return true; } void pl_shader_obj_destroy(struct pl_shader_obj **ptr) { struct pl_shader_obj *obj = *ptr; if (!obj) return; if (obj->uninit) obj->uninit(obj->gpu, obj->priv); *ptr = NULL; talloc_free(obj); } void *sh_require_obj(struct pl_shader *sh, struct pl_shader_obj **ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(const struct pl_gpu *gpu, void *priv)) { if (!ptr) return NULL; struct pl_shader_obj *obj = *ptr; if (obj && obj->gpu != sh->gpu) { PL_ERR(sh, "Passed pl_shader_obj belongs to different GPU!"); return NULL; } if (obj && obj->type != type) { PL_ERR(sh, "Passed pl_shader_obj of wrong type! Shader objects must " "always be used with the same type of shader."); return NULL; } if (!obj) { obj = talloc_zero(NULL, struct pl_shader_obj); obj->gpu = sh->gpu; obj->type = type; obj->priv = talloc_zero_size(obj, priv_size); obj->uninit = uninit; } *ptr = obj; return obj->priv; } ident_t sh_prng(struct pl_shader *sh, bool temporal, ident_t *p_state) { // Initialize the PRNG. This is friendly for wide usage and returns in // a very pleasant-looking distribution across frames even if the difference // between input coordinates is very small. This is based on BlumBlumShub, // with some modifications for speed / aesthetics. // cf. https://briansharpe.wordpress.com/2011/10/01/gpu-texture-free-noise/ ident_t randfun = sh_fresh(sh, "random"), permute = sh_fresh(sh, "permute"); GLSLH("float %s(float x) { \n" " x = (34.0 * x + 1.0) * x; \n" " return fract(x * 1.0/289.0) * 289.0; \n" // (almost) mod 289 "} \n" "float %s(inout float state) { \n" " state = %s(state); \n" " return fract(state * 1.0/41.0); \n" "}\n", permute, randfun, permute); const char *seed = "0.0"; if (temporal) { float seedval = modff(M_PI * sh->index, &(float){0}); seed = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("seed"), .data = &seedval, .dynamic = true, }); } ident_t state = sh_fresh(sh, "prng"); GLSL("vec3 %s_m = vec3(gl_FragCoord.xy, %s) + vec3(1.0); \n" "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z); \n", state, seed, state, permute, permute, permute, state, state, state); if (p_state) *p_state = state; ident_t res = sh_fresh(sh, "RAND"); GLSLH("#define %s (%s(%s))\n", res, randfun, state); return res; } // Defines a LUT position helper macro. This translates from an absolute texel // scale (0.0 - 1.0) to the texture coordinate scale for the corresponding // sample in a texture of dimension `lut_size`. static ident_t sh_lut_pos(struct pl_shader *sh, int lut_size) { ident_t name = sh_fresh(sh, "LUT_POS"); GLSLH("#define %s(x) mix(%f, %f, (x)) \n", name, 0.5 / lut_size, 1.0 - 0.5 / lut_size); return name; } struct sh_lut_obj { enum sh_lut_method method; int width, height, depth, comps; union { const struct pl_tex *tex; struct bstr str; float *data; } weights; }; static void sh_lut_uninit(const struct pl_gpu *gpu, void *ptr) { struct sh_lut_obj *lut = ptr; switch (lut->method) { case SH_LUT_TEXTURE: case SH_LUT_LINEAR: pl_tex_destroy(gpu, &lut->weights.tex); break; case SH_LUT_UNIFORM: talloc_free(lut->weights.data); break; case SH_LUT_LITERAL: talloc_free(lut->weights.str.start); break; default: break; } *lut = (struct sh_lut_obj) {0}; } // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO) #define SH_LUT_MAX_LITERAL 256 ident_t sh_lut(struct pl_shader *sh, struct pl_shader_obj **obj, enum sh_lut_method method, int width, int height, int depth, int comps, bool update, void *priv, void (*fill)(void *priv, float *data, int w, int h, int d)) { const struct pl_gpu *gpu = sh->gpu; float *tmp = NULL; ident_t ret = NULL; pl_assert(width > 0 && height >= 0 && depth >= 0); int sizes[] = { width, height, depth }; int size = width * PL_DEF(height, 1) * PL_DEF(depth, 1); int dims = depth ? 3 : height ? 2 : 1; int texdim = 0; int max_tex_dim[] = { gpu ? gpu->limits.max_tex_1d_dim : 0, gpu ? gpu->limits.max_tex_2d_dim : 0, gpu ? gpu->limits.max_tex_3d_dim : 0, }; for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) { if (size <= max_tex_dim[d - 1]) { texdim = d; break; } } struct sh_lut_obj *lut = SH_OBJ(sh, obj, PL_SHADER_OBJ_LUT, struct sh_lut_obj, sh_lut_uninit); if (!lut) { PL_ERR(sh, "Failed initializing LUT object!"); goto error; } if (!gpu && method == SH_LUT_LINEAR) { PL_ERR(sh, "Linear LUTs require the use of a GPU!"); goto error; } if (!gpu) { PL_TRACE(sh, "No GPU available, falling back to literal LUT embedding"); method = SH_LUT_LITERAL; } // Pick the best method if (!method && size <= SH_LUT_MAX_LITERAL) method = SH_LUT_LITERAL; if (!method && texdim) method = SH_LUT_TEXTURE; if (!method && gpu && gpu->caps & PL_GPU_CAP_INPUT_VARIABLES) method = SH_LUT_UNIFORM; // No other method found if (!method) { PL_TRACE(sh, "No other LUT method works, falling back to literal " "embedding.. this is most likely a slow path!"); method = SH_LUT_LITERAL; } // Forcibly reinitialize the existing LUT if needed if (method != lut->method || width != lut->width || height != lut->height || depth != lut->depth || comps != lut->comps) { PL_DEBUG(sh, "LUT method or size changed, reinitializing.."); update = true; } if (update) { sh_lut_uninit(gpu, lut); tmp = talloc_zero_size(NULL, size * comps * sizeof(float)); fill(priv, tmp, width, height, depth); switch (method) { case SH_LUT_TEXTURE: case SH_LUT_LINEAR: { if (!texdim) { PL_ERR(sh, "Texture LUT exceeds texture dimensions!"); goto error; } enum pl_fmt_caps caps = PL_FMT_CAP_SAMPLEABLE; enum pl_tex_sample_mode mode = PL_TEX_SAMPLE_NEAREST; if (method == SH_LUT_LINEAR) { caps |= PL_FMT_CAP_LINEAR; mode = PL_TEX_SAMPLE_LINEAR; } const struct pl_fmt *fmt; fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, comps, 16, 32, caps); if (!fmt) { PL_ERR(sh, "Found no compatible texture format for LUT!"); goto error; } pl_assert(!lut->weights.tex); lut->weights.tex = pl_tex_create(gpu, &(struct pl_tex_params) { .w = width, .h = PL_DEF(height, texdim >= 2 ? 1 : 0), .d = PL_DEF(depth, texdim >= 3 ? 1 : 0), .format = fmt, .sampleable = true, .sample_mode = mode, .address_mode = PL_TEX_ADDRESS_CLAMP, .initial_data = tmp, }); if (!lut->weights.tex) { PL_ERR(sh, "Failed creating LUT texture!"); goto error; } break; } case SH_LUT_UNIFORM: pl_assert(!lut->weights.data); lut->weights.data = tmp; // re-use `tmp` tmp = NULL; break; case SH_LUT_LITERAL: { pl_assert(!lut->weights.str.len); for (int i = 0; i < size * comps; i += comps) { if (i > 0) bstr_xappend_asprintf_c(lut, &lut->weights.str, ","); if (comps > 1) bstr_xappend_asprintf_c(lut, &lut->weights.str, "vec%d(", comps); for (int c = 0; c < comps; c++) { bstr_xappend_asprintf_c(lut, &lut->weights.str, "%s%f", c > 0 ? "," : "", tmp[i+c]); } if (comps > 1) bstr_xappend_asprintf_c(lut, &lut->weights.str, ")"); } break; } case SH_LUT_AUTO: abort(); } lut->method = method; lut->width = width; lut->height = height; lut->depth = depth; lut->comps = comps; } // Done updating, generate the GLSL ident_t name = sh_fresh(sh, "lut"); ident_t arr_name = NULL; static const char * const types[] = {"float", "vec2", "vec3", "vec4"}; static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"}; switch (method) { case SH_LUT_TEXTURE: case SH_LUT_LINEAR: { ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "weights", .type = PL_DESC_SAMPLED_TEX, }, .object = lut->weights.tex, }); ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0}; for (int i = 0; i < dims; i++) pos_macros[i] = sh_lut_pos(sh, sizes[i]); GLSLH("#define %s(pos) (texture(%s, %s(\\\n", name, tex, types[texdim - 1]); for (int i = 0; i < texdim; i++) { char sep = i == 0 ? ' ' : ','; if (pos_macros[i]) { GLSLH(" %c%s((pos).%c)\\\n", sep, pos_macros[i], "xyzw"[i]); } else { GLSLH(" %c%f\\\n", sep, 0.5); } } GLSLH(" )).%s)\n", swizzles[comps - 1]); ret = name; break; } case SH_LUT_UNIFORM: arr_name = sh_var(sh, (struct pl_shader_var) { .var = { .name = "weights", .type = PL_VAR_FLOAT, .dim_v = comps, .dim_m = 1, .dim_a = size, }, .data = lut->weights.data, }); break; case SH_LUT_LITERAL: arr_name = sh_fresh(sh, "weights"); GLSLH("const %s %s[%d] = float[](\n ", types[comps - 1], arr_name, size); bstr_xappend(sh, &sh->buffers[SH_BUF_HEADER], lut->weights.str); GLSLH(");\n"); break; default: abort(); } if (arr_name) { GLSLH("#define %s(pos) (%s[int(%d * (pos).x)\\\n", name, arr_name, width); int shift = width; for (int i = 1; i < dims; i++) { GLSLH(" + %d * int(%d * (pos)[%d])\\\n", shift, sizes[i], i); shift *= sizes[i]; } GLSLH(" ])\n"); ret = name; } // fall through error: talloc_free(tmp); return ret; } libplacebo-0.4.0/src/shaders.h000066400000000000000000000156331324021332500162030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include "bstr/bstr.h" #include "common.h" #include "context.h" #include "gpu.h" // This represents an identifier (e.g. name of function, uniform etc.) for // a shader resource. The generated identifiers are immutable, but only live // until pl_shader_reset - so make copies when passing to external stuff. typedef const char * ident_t; enum pl_shader_buf { SH_BUF_PRELUDE, // extra #defines etc. SH_BUF_HEADER, // previous passes, helper function definitions, etc. SH_BUF_BODY, // partial contents of the "current" function SH_BUF_COUNT, }; struct pl_shader { // Read-only fields struct pl_context *ctx; const struct pl_gpu *gpu; // Internal state struct ta_ref *tmp; bool mutable; int output_w; int output_h; struct pl_shader_res res; // for accumulating vertex_attribs etc. struct bstr buffers[SH_BUF_COUNT]; bool is_compute; bool flexible_work_groups; uint8_t ident; uint8_t index; int fresh; }; // Attempt enabling compute shaders for this pass, if possible bool sh_try_compute(struct pl_shader *sh, int bw, int bh, bool flex, size_t mem); // Attempt merging a secondary shader into the current shader. Returns NULL if // merging fails (e.g. incompatible signatures); otherwise returns an identifier // corresponding to the generated subpass function. ident_t sh_subpass(struct pl_shader *sh, const struct pl_shader *sub); // Helpers for adding new variables/descriptors/etc. with fresh, unique // identifier names. These will never conflcit with other identifiers, even // if the shaders are merged together. ident_t sh_fresh(struct pl_shader *sh, const char *name); // Add a new shader var and return its identifier ident_t sh_var(struct pl_shader *sh, struct pl_shader_var sv); // Add a new shader desc and return its identifier. This function takes care of // setting the binding to a fresh bind point according to the namespace // requirements, so the caller may leave it blank. ident_t sh_desc(struct pl_shader *sh, struct pl_shader_desc sd); // Add a new vec2 vertex attribute from a pl_rect2df, or returns NULL on failure. ident_t sh_attr_vec2(struct pl_shader *sh, const char *name, const struct pl_rect2df *rc); // Bind a texture under a given transformation and make its attributes // available as well. If an output pointer for one of the attributes is left // as NULL, that attribute will not be added. Returns NULL on failure. `rect` // is optional, and defaults to the full texture if left as NULL. // // Note that for e.g. compute shaders, the vec2 out_pos might be a macro that // expands to an expensive computation, and should be cached by the user. ident_t sh_bind(struct pl_shader *sh, const struct pl_tex *tex, const char *name, const struct pl_rect2df *rect, ident_t *out_pos, ident_t *out_size, ident_t *out_pt); // Underlying function for appending text to a shader void pl_shader_append(struct pl_shader *sh, enum pl_shader_buf buf, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); #define GLSLP(...) pl_shader_append(sh, SH_BUF_PRELUDE, __VA_ARGS__) #define GLSLH(...) pl_shader_append(sh, SH_BUF_HEADER, __VA_ARGS__) #define GLSL(...) pl_shader_append(sh, SH_BUF_BODY, __VA_ARGS__) // Requires that the share is mutable, has an output signature compatible // with the given input signature, as well as an output size compatible with // the given size requirements. Errors and returns false otherwise. bool sh_require(struct pl_shader *sh, enum pl_shader_sig insig, int w, int h); // Shader resources enum pl_shader_obj_type { PL_SHADER_OBJ_INVALID = 0, PL_SHADER_OBJ_PEAK_DETECT, PL_SHADER_OBJ_SAMPLER, PL_SHADER_OBJ_SAMPLER_SEP, PL_SHADER_OBJ_DITHER, PL_SHADER_OBJ_LUT, }; struct pl_shader_obj { enum pl_shader_obj_type type; const struct pl_gpu *gpu; void (*uninit)(const struct pl_gpu *gpu, void *priv); void *priv; }; // Returns (*ptr)->priv, or NULL on failure void *sh_require_obj(struct pl_shader *sh, struct pl_shader_obj **ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(const struct pl_gpu *gpu, void *priv)); #define SH_OBJ(sh, ptr, type, t, uninit) \ ((t*) sh_require_obj(sh, ptr, type, sizeof(t), uninit)) // Initializes a PRNG. The resulting string will directly evaluate to a // pseudorandom, uniformly distributed float from [0.0,1.0]. Since this // algorithm works by mutating a state variable, if the user wants to use the // resulting PRNG inside a subfunction, they must add an extra `inout float %s` // with the name of `state` to the signature. (Optional) // // If `temporal` is set, the PRNG will vary across frames. ident_t sh_prng(struct pl_shader *sh, bool temporal, ident_t *state); enum sh_lut_method { SH_LUT_AUTO = 0, // pick whatever makes the most sense SH_LUT_TEXTURE, // upload as texture SH_LUT_UNIFORM, // uniform array SH_LUT_LITERAL, // constant / literal array in shader source (fallback) // this is never picked by SH_DATA_AUTO SH_LUT_LINEAR, // upload as linearly-sampleable texture }; // Makes a table of float vecs values available as a shader variable, using an // a given method (falling back if needed). The resulting identifier can be // sampled directly as %s(pos), where pos is a vector with the right number of // dimensions. `pos` must be an integer vector within the bounds of the array, // unless the method is `SH_LUT_LINEAR` or `SH_LUT_TEXTURE` in which case it's // a float vector that gets interpolated and clamped as needed. Returns NULL on // error. // // This function also acts as `sh_require_obj`, and uses the `buf`, `tex` // and `text` fields of the resulting `obj`. (The other fields may be used by // the caller) // // The `fill` function will be called with a zero-initialized buffer whenever // the data needs to be computed, which happens whenever the size is changed, // the shader object is invalidated, or `update` is set to true. ident_t sh_lut(struct pl_shader *sh, struct pl_shader_obj **obj, enum sh_lut_method method, int width, int height, int depth, int components, bool update, void *priv, void (*fill)(void *priv, float *data, int w, int h, int d)); libplacebo-0.4.0/src/shaders/000077500000000000000000000000001324021332500160225ustar00rootroot00000000000000libplacebo-0.4.0/src/shaders/colorspace.c000066400000000000000000001231421324021332500203230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" void pl_shader_decode_color(struct pl_shader *sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; GLSL("// pl_shader_decode_color \n" "{ \n"); // For the non-linear color systems we need some special input handling // to make sure we don't accidentally screw everything up because of the // alpha multiplication, which only commutes with linear operations. bool is_nonlinear = !pl_color_system_is_linear(repr->sys); if (is_nonlinear && repr->alpha == PL_ALPHA_PREMULTIPLIED) { GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n"); repr->alpha = PL_ALPHA_INDEPENDENT; } // XYZ needs special handling due to the input gamma logic if (repr->sys == PL_COLOR_SYSTEM_XYZ) { float scale = pl_color_repr_normalize(repr); GLSL("color.rgb = pow(%f * color.rgb, vec3(2.6));\n", scale); } enum pl_color_system orig_sys = repr->sys; struct pl_transform3x3 tr = pl_color_repr_decode(repr, params); ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_m"), .data = tr.c, }); GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c); if (orig_sys == PL_COLOR_SYSTEM_BT_2020_C) { // Conversion for C'rcY'cC'bc via the BT.2020 CL system: // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 // = (B'-Y'c) / 1.5816 | C'bc > 0 // // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 // = (R'-Y'c) / 0.9936 | C'rc > 0 // // as per the BT.2020 specification, table 4. This is a non-linear // transformation because (constant) luminance receives non-equal // contributions from the three different channels. GLSL("// constant luminance conversion \n" "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" " vec2(1.9404, 1.7184), \n" " lessThanEqual(color.br, vec2(0))) \n" " + color.gg; \n" // Expand channels to camera-linear light. This shader currently just // assumes everything uses the BT.2020 12-bit gamma function, since the // difference between 10 and 12-bit is negligible for anything other // than 12-bit content. "vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" " vec3(1.0/0.45)), \n" " lessThanEqual(vec3(0.08145), color.rgb)); \n" // Calculate the green channel from the expanded RYcB, and recompress to G' // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B "color.g = (lin.g - 0.2627*lin.r - 0.0593*lin.b)*1.0/0.6780; \n" "color.g = mix(color.g * 4.5, \n" " 1.0993 * pow(color.g, 0.45) - 0.0993, \n" " 0.0181 <= color.g); \n"); } if (repr->alpha == PL_ALPHA_INDEPENDENT) { GLSL("color.rgb *= vec3(color.a);\n"); repr->alpha = PL_ALPHA_PREMULTIPLIED; } GLSL("}"); } void pl_shader_encode_color(struct pl_shader *sh, const struct pl_color_repr *repr) { // Since this is a relatively rare operation, bypass it as much as possible bool skip = true; skip &= PL_DEF(repr->sys, PL_COLOR_SYSTEM_RGB) == PL_COLOR_SYSTEM_RGB; skip &= PL_DEF(repr->levels, PL_COLOR_LEVELS_PC) == PL_COLOR_LEVELS_PC; skip &= PL_DEF(repr->alpha, PL_ALPHA_PREMULTIPLIED) == PL_ALPHA_PREMULTIPLIED; skip &= repr->bits.sample_depth == repr->bits.color_depth; skip &= !repr->bits.bit_shift; if (skip) return; if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; GLSL("// pl_shader_encode_color \n" "{ \n"); if (!pl_color_system_is_linear(repr->sys)) { // FIXME: implement this case PL_ERR(sh, "Non-linear color encoding currently unimplemented!"); return; } struct pl_color_repr copy = *repr; struct pl_transform3x3 tr = pl_color_repr_decode(©, NULL); pl_transform3x3_invert(&tr); ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_m"), .data = tr.c, }); GLSL("color.rgb = %s * color.rgb + %s;\n", cmat, cmat_c); if (repr->alpha == PL_ALPHA_INDEPENDENT) GLSL("color.rgb /= vec3(max(color.a, 1e-6));\n"); GLSL("}\n"); } // Common constants for SMPTE ST.2084 (PQ) static const float PQ_M1 = 2610./4096 * 1./4, PQ_M2 = 2523./4096 * 128, PQ_C1 = 3424./4096, PQ_C2 = 2413./4096 * 32, PQ_C3 = 2392./4096 * 32; // Common constants for ARIB STD-B67 (HLG) static const float HLG_A = 0.17883277, HLG_B = 0.28466892, HLG_C = 0.55991073; // Common constants for Panasonic V-Log static const float VLOG_B = 0.00873, VLOG_C = 0.241514, VLOG_D = 0.598206; // Common constants for Sony S-Log static const float SLOG_A = 0.432699, SLOG_B = 0.037584, SLOG_C = 0.616596 + 0.03, SLOG_P = 3.538813, SLOG_Q = 0.030001, SLOG_K2 = 155.0 / 219.0; void pl_shader_linearize(struct pl_shader *sh, enum pl_color_transfer trc) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (trc == PL_COLOR_TRC_LINEAR) return; // Note that this clamp may technically violate the definition of // ITU-R BT.2100, which allows for sub-blacks and super-whites to be // displayed on the display where such would be possible. That said, the // problem is that not all gamma curves are well-defined on the values // outside this range, so we ignore it and just clamp anyway for sanity. GLSL("// pl_shader_linearize \n" "color.rgb = max(color.rgb, 0.0); \n"); switch (trc) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" " pow((color.rgb + vec3(0.055))/vec3(1.055), \n" " vec3(2.4)), \n" " lessThan(vec3(0.04045), color.rgb)); \n"); break; case PL_COLOR_TRC_BT_1886: GLSL("color.rgb = pow(color.rgb, vec3(2.4));\n"); break; case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.8));\n"); break; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(2.2));\n"); break; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(2.8));\n"); break; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" " pow(color.rgb, vec3(1.8)), \n" " lessThan(vec3(0.03125), color.rgb)); \n"); break; case PL_COLOR_TRC_PQ: GLSL("color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" // PQ's output range is 0-10000, but we need it to be relative to // to PL_COLOR_REF_WHITE instead, so rescale "color.rgb *= vec3(%f);\n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000 / PL_COLOR_REF_WHITE); break; case PL_COLOR_TRC_HLG: GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " lessThan(vec3(0.5), color.rgb)); \n", HLG_C, HLG_A, HLG_B); break; case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f), \n" " lessThanEqual(vec3(0.181), color.rgb)); \n", VLOG_D, VLOG_C, VLOG_B); break; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f); \n", SLOG_C, SLOG_A, SLOG_B); break; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f)) * vec3(1.0/%f), \n" " lessThanEqual(vec3(%f), color.rgb)); \n", SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); break; default: abort(); } } void pl_shader_delinearize(struct pl_shader *sh, enum pl_color_transfer trc) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (trc == PL_COLOR_TRC_LINEAR) return; GLSL("// pl_shader_delinearize \n" "color.rgb = max(color.rgb, 0.0); \n"); switch (trc) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(12.92), \n" " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" " - vec3(0.055), \n" " lessThanEqual(vec3(0.0031308), color.rgb)); \n"); break; case PL_COLOR_TRC_BT_1886: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));\n"); break; case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.8));\n"); break; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.2));\n"); break; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.8));\n"); break; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(16.0), \n" " pow(color.rgb, vec3(1.0/1.8)), \n" " lessThanEqual(vec3(0.001953), color.rgb)); \n"); break; case PL_COLOR_TRC_PQ: GLSL("color.rgb *= vec3(1.0/%f); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", 10000 / PL_COLOR_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_TRC_HLG: GLSL("color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " lessThan(vec3(1.0), color.rgb)); \n", HLG_A, HLG_B, HLG_C); break; case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" " vec3(%f) * log(color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " lessThanEqual(vec3(0.01), color.rgb)); \n", VLOG_C / M_LN10, VLOG_B, VLOG_D); break; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", SLOG_A / M_LN10, SLOG_B, SLOG_C); break; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " lessThanEqual(vec3(0.0), color.rgb)); \n", SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); break; default: abort(); } } const struct pl_sigmoid_params pl_sigmoid_default_params = { .center = 0.75, .slope = 6.50, }; void pl_shader_sigmoidize(struct pl_shader *sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, 0.75); float slope = PL_DEF(params->slope, 6.5); // This function needs to go through (0,0) and (1,1), so we compute the // values at 1 and 0, and then scale/shift them, respectively. float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_sigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4(%f) - log(vec4(1.0) / (color * vec4(%f) + vec4(%f)) \n" " - vec4(1.0)) * vec4(%f); \n", center, scale, offset, 1.0 / slope); } void pl_shader_unsigmoidize(struct pl_shader *sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; // See: pl_shader_sigmoidize params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, 0.75); float slope = PL_DEF(params->slope, 6.5); float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_unsigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4(%f) / (vec4(1.0) + exp(vec4(%f) * (vec4(%f) - color))) \n" " - vec4(%f); \n", 1.0 / scale, slope, center, offset / scale); } // Applies the OOTF / inverse OOTF static void pl_shader_ootf(struct pl_shader *sh, enum pl_color_light light, ident_t luma) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (!light || light == PL_COLOR_LIGHT_DISPLAY) return; GLSL("// pl_shader_ootf \n" "color.rgb = max(color.rgb, 0.0); \n"); switch (light) { case PL_COLOR_LIGHT_SCENE_HLG: // HLG OOTF from BT.2100, assuming a reference display with a // peak of 1000 cd/m² -> gamma = 1.2 GLSL("color.rgb *= vec3(%f * pow(dot(%s, color.rgb), 0.2));\n", (1000 / PL_COLOR_REF_WHITE) / pow(12, 1.2), luma); break; case PL_COLOR_LIGHT_SCENE_709_1886: // This OOTF is defined by encoding the result as 709 and then decoding // it as 1886; although this is called 709_1886 we actually use the // more precise (by one decimal) values from BT.2020 instead GLSL("color.rgb = mix(color.rgb * vec3(4.5), \n" " vec3(1.0993) * pow(color.rgb, vec3(0.45)) \n" " - vec3(0.0993), \n" " lessThan(vec3(0.0181), color.rgb)); \n" "color.rgb = pow(color.rgb, vec3(2.4)); \n"); break; case PL_COLOR_LIGHT_SCENE_1_2: GLSL("color.rgb = pow(color.rgb, vec3(1.2));\n"); break; default: abort(); } } static void pl_shader_inverse_ootf(struct pl_shader *sh, enum pl_color_light light, ident_t luma) { if (!light || light == PL_COLOR_LIGHT_DISPLAY) return; GLSL("// pl_shader_inverse_ootf \n" "color.rgb = max(color.rgb, 0.0); \n"); switch (light) { case PL_COLOR_LIGHT_SCENE_HLG: GLSL("color.rgb *= vec3(1.0/%f); \n" "color.rgb /= vec3(max(1e-6, pow(dot(%s, color.rgb), \n" " 0.2/1.2))); \n", (1000 / PL_COLOR_REF_WHITE) / pow(12, 1.2), luma); break; case PL_COLOR_LIGHT_SCENE_709_1886: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4)); \n" "color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n" " vec3(1/0.45)), \n" " lessThan(vec3(0.08145), color.rgb)); \n"); break; case PL_COLOR_LIGHT_SCENE_1_2: GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.2));\n"); break; default: abort(); } } // Average light level for SDR signals. This is equal to a signal level of 0.5 // under a typical presentation gamma of about 2.0. static const float sdr_avg = 0.25; const struct pl_color_map_params pl_color_map_default_params = { .intent = PL_INTENT_RELATIVE_COLORIMETRIC, .tone_mapping_algo = PL_TONE_MAPPING_HABLE, .tone_mapping_desaturate = 0.5, .peak_detect_frames = 63, .scene_threshold = 0.2, }; struct sh_peak_obj { const struct pl_gpu *gpu; const struct pl_buf *buf; }; static void sh_peak_uninit(const struct pl_gpu *gpu, void *ptr) { struct sh_peak_obj *obj = ptr; pl_buf_destroy(obj->gpu, &obj->buf); *obj = (struct sh_peak_obj) {0}; } static void hdr_update_peak(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_color_map_params *params) { if (!state) return; int frames = PL_DEF(params->peak_detect_frames, 20); if (frames < 1 || frames > 1000) { PL_ERR(sh, "Parameter peak_detect_frames must be >= 1 and <= 1000 " "(was %d).", frames); return; } struct sh_peak_obj *obj; obj = SH_OBJ(sh, state, PL_SHADER_OBJ_PEAK_DETECT, struct sh_peak_obj, sh_peak_uninit); if (!obj) return; if (!sh_try_compute(sh, 8, 8, true, sizeof(uint32_t))) { PL_WARN(sh, "HDR peak detection requires compute shaders.. disabling"); return; } const struct pl_gpu *gpu = sh->gpu; obj->gpu = gpu; struct pl_var idx, num, ctr, max, avg; idx = pl_var_uint(sh_fresh(sh, "index")); num = pl_var_uint(sh_fresh(sh, "number")); ctr = pl_var_uint(sh_fresh(sh, "counter")); max = pl_var_uint(sh_fresh(sh, "frames_max")); avg = pl_var_uint(sh_fresh(sh, "frames_avg")); max.dim_a = avg.dim_a = frames + 1; struct pl_var max_total, avg_total; max_total = pl_var_uint(sh_fresh(sh, "max_total")); avg_total = pl_var_uint(sh_fresh(sh, "avg_total")); // Attempt packing the peak detection SSBO struct pl_desc ssbo = { .name = "PeakDetect", .type = PL_DESC_BUF_STORAGE, .access = PL_DESC_ACCESS_READWRITE, }; struct pl_var_layout idx_l, num_l, ctr_l, max_l, avg_l, max_tl, avg_tl; bool ok = true; ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &idx_l, idx); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &num_l, num); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &ctr_l, ctr); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &max_l, max); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &avg_l, avg); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &max_tl, max_total); ok &= pl_buf_desc_append(sh->tmp, gpu, &ssbo, &avg_tl, avg_total); if (!ok) { PL_WARN(sh, "HDR peak detection exhausts device limits.. disabling"); talloc_free(ssbo.buffer_vars); return; } // Create the SSBO if necessary size_t size = pl_buf_desc_size(&ssbo); if (!obj->buf || obj->buf->params.size != size) { PL_TRACE(sh, "(Re)creating HDR peak detection SSBO"); void *data = talloc_zero_size(NULL, size); pl_buf_destroy(gpu, &obj->buf); obj->buf = pl_buf_create(gpu, &(struct pl_buf_params) { .type = PL_BUF_STORAGE, .size = pl_buf_desc_size(&ssbo), .initial_data = data, }); talloc_free(data); } if (!obj->buf) { PL_ERR(sh, "Failed creating peak detection SSBO!"); return; } // Attach the SSBO and perform the peak detection logic sh_desc(sh, (struct pl_shader_desc) { .desc = ssbo, .object = obj->buf, }); // For performance, we want to do as few atomic operations on global // memory as possible, so use an atomic in shmem for the work group. ident_t wg_sum = sh_fresh(sh, "wg_sum"); GLSLH("shared uint %s;\n", wg_sum); GLSL("%s = 0;\n", wg_sum); // Have each thread update the work group sum with the local value GLSL("barrier(); \n" "atomicAdd(%s, uint(sig * %f)); \n", wg_sum, PL_COLOR_REF_WHITE); // Have one thread per work group update the global atomics. We use the // work group average even for the global sum, to make the values slightly // more stable and smooth out tiny super-highlights. GLSL("memoryBarrierShared(); \n" "barrier(); \n" "if (gl_LocalInvocationIndex == 0) { \n" " uint wg_avg = %s / (gl_WorkGroupSize.x * gl_WorkGroupSize.y); \n" " atomicMax(%s[%s], wg_avg); \n" " atomicAdd(%s[%s], wg_avg); \n" "} \n", wg_sum, max.name, idx.name, avg.name, idx.name); // Update the sig_peak/sig_avg from the old SSBO state GLSL("if (%s > 0) { \n" " float peak = float(%s) / (%f * float(%s)); \n" " float avg = float(%s) / (%f * float(%s)); \n" " sig_peak = max(1.0, peak); \n" " sig_avg = max(%f, avg); \n" "} \n", num.name, max_total.name, PL_COLOR_REF_WHITE, num.name, avg_total.name, PL_COLOR_REF_WHITE, num.name, sdr_avg); // Finally, to update the global state, we increment a counter per dispatch GLSL("memoryBarrierBuffer(); \n" "barrier(); \n" "uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y; \n" "if (gl_LocalInvocationIndex == 0 && atomicAdd(%s, 1) == num_wg - 1) { \n" " %s = 0; \n" // Divide out the workgroup sum by the number of work groups " %s[%s] /= num_wg; \n", ctr.name, ctr.name, avg.name, idx.name); // Scene change detection if (params->scene_threshold > 0) { GLSL(" uint cur_max = %s[%s]; \n" " uint cur_avg = %s[%s]; \n" " int diff = int(%s * cur_avg) - int(%s); \n" " if (abs(diff) > %s * %d) { \n" " %s = 0; \n" " %s = %s = 0; \n" " for (uint i = 0; i < %d; i++) \n" " %s[i] = %s[i] = 0; \n" " %s[%s] = cur_max; \n" " %s[%s] = cur_avg; \n" " } \n", max.name, idx.name, avg.name, idx.name, num.name, avg_total.name, num.name, (int) (params->scene_threshold * PL_COLOR_REF_WHITE), num.name, max_total.name, avg_total.name, frames + 1, max.name, avg.name, max.name, idx.name, avg.name, idx.name); } // Add the current frame, then subtract and reset the next frame GLSL(" uint next = (%s + 1) %% %d; \n" " %s += %s[%s] - %s[next]; \n" " %s += %s[%s] - %s[next]; \n" " %s[next] = %s[next] = 0; \n", idx.name, frames + 1, max_total.name, max.name, idx.name, max.name, avg_total.name, avg.name, idx.name, avg.name, max.name, avg.name); // Update the index and count GLSL(" %s = next; \n" " %s = min(%s + 1, %d); \n" " memoryBarrierBuffer(); \n" "} \n", idx.name, num.name, num.name, frames); } static void pl_shader_tone_map(struct pl_shader *sh, struct pl_color_space src, struct pl_color_space dst, ident_t luma, struct pl_shader_obj **peak_detect_state, const struct pl_color_map_params *params) { GLSL("// pl_shader_tone_map\n"); // To prevent discoloration due to out-of-bounds clipping, we need to make // sure to reduce the value range as far as necessary to keep the entire // signal in range, so tone map based on the brightest component. GLSL("float sig = max(max(color.r, color.g), color.b); \n" "float sig_peak = %f; \n" "float sig_avg = %f; \n", src.sig_peak, src.sig_avg); // HDR peak detection is done before scaling based on the dst.sig_peak/avg // in order to make the detected values stable / averageable. hdr_update_peak(sh, peak_detect_state, params); // Rescale the variables in order to bring it into a representation where // 1.0 represents the dst_peak. This is because all of the tone mapping // algorithms are defined in such a way that they map to the range [0.0, 1.0]. if (dst.sig_peak > 1.0) { GLSL("sig *= 1.0/%f; \n" "sig_peak *= 1.0/%f; \n", dst.sig_peak, dst.sig_peak); } // Desaturate the color using a coefficient dependent on the signal level if (params->tone_mapping_desaturate > 0) { GLSL("float luma = dot(%s, color.rgb); \n" "float coeff = max(sig - 0.18, 1e-6) / max(sig, 1e-6); \n" "coeff = pow(coeff, %f); \n" "color.rgb = mix(color.rgb, vec3(luma), coeff); \n" "sig = mix(sig, luma, coeff); \n", luma, 10.0 / params->tone_mapping_desaturate); } // Store the original signal level for later re-use GLSL("float sig_orig = sig;\n"); // Scale the signal to compensate for differences in the average brightness GLSL("float slope = min(1.0, %f / sig_avg); \n" "sig *= slope; \n" "sig_peak *= slope; \n", dst.sig_avg); float param = params->tone_mapping_param; switch (params->tone_mapping_algo) { case PL_TONE_MAPPING_CLIP: GLSL("sig *= %f;\n", PL_DEF(param, 1.0)); break; case PL_TONE_MAPPING_MOBIUS: GLSL("const float j = %f; \n" // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) "float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak); \n" "float b = (j*j - 2.0*j*sig_peak + sig_peak) / \n" " max(1e-6, sig_peak - 1.0); \n" "float scale = (b*b + 2.0*b*j + j*j) / (b-a); \n" "sig = sig > j ? (scale * (sig + a) / (sig + b)) : sig; \n", PL_DEF(param, 0.3)); break; case PL_TONE_MAPPING_REINHARD: { float contrast = PL_DEF(param, 0.5), offset = (1.0 - contrast) / contrast; GLSL("sig = sig / (sig + %f); \n" "float scale = (sig_peak + %f) / sig_peak; \n" "sig *= scale; \n", offset, offset); break; } case PL_TONE_MAPPING_HABLE: { float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; ident_t hable = sh_fresh(sh, "hable"); GLSLH("float %s(float x) { \n" "return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f; \n" "} \n", hable, A, C*B, D*E, A, B, D*F, E/F); GLSL("sig = %s(sig) / %s(sig_peak);\n", hable, hable); break; } case PL_TONE_MAPPING_GAMMA: GLSL("const float cutoff = 0.05, gamma = 1.0/%f; \n" "float scale = pow(cutoff / sig_peak, gamma) / cutoff; \n" "sig = sig > cutoff ? pow(sig / sig_peak, gamma) : scale * sig; \n", PL_DEF(param, 1.8)); break; case PL_TONE_MAPPING_LINEAR: GLSL("sig *= %f / sig_peak;\n", PL_DEF(param, 1.0)); break; default: abort(); } // Clip the final signal to the output range and apply the difference // linearly to the RGB channels. (this prevents discoloration) GLSL("sig = min(sig, 1.0); \n" "color.rgb *= sig / sig_orig; \n"); } void pl_shader_color_map(struct pl_shader *sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, struct pl_shader_obj **peak_detect_state, bool prelinearized) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; GLSL("// pl_shader_color_map\n"); GLSL("{\n"); params = PL_DEF(params, &pl_color_map_default_params); // Defaults the primaries/transfer to sensible values. This isn't strictly // necessary, but it avoids some redundant operations in the cases where // src and dst are equal but one is set and the other is unknown src.primaries = PL_DEF(src.primaries, PL_COLOR_PRIM_BT_709); src.transfer = PL_DEF(src.transfer, PL_COLOR_TRC_GAMMA22); // If the source light type is unknown, infer it from the transfer function. if (!src.light) { src.light = (src.transfer == PL_COLOR_TRC_HLG) ? PL_COLOR_LIGHT_SCENE_HLG : PL_COLOR_LIGHT_DISPLAY; } // To be as conservative as possible, color mapping is disabled by default // except for special cases which are considered to be "sufficiently // different" from the source space. For primaries, this means anything // wide gamut; and for transfers, this means anything radically different // from the typical SDR curves. if (!dst.primaries) { dst.primaries = src.primaries; if (pl_color_primaries_is_wide_gamut(dst.primaries)) dst.primaries = PL_COLOR_PRIM_BT_709; } if (!dst.transfer) { dst.transfer = src.transfer; if (pl_color_transfer_is_hdr(dst.transfer) || dst.transfer == PL_COLOR_TRC_LINEAR) dst.transfer = PL_COLOR_TRC_GAMMA22; } // 99 times out of 100, this is what we want dst.light = PL_DEF(dst.light, PL_COLOR_LIGHT_DISPLAY); // Compute the highest encodable level float src_range = pl_color_transfer_nominal_peak(src.transfer), dst_range = pl_color_transfer_nominal_peak(dst.transfer); // Default the src/dst peak information based on the encodable range. For // the source peak, this is the safest possible value (no clipping). For // the dest peak, this makes full use of the available dynamic range. src.sig_peak = PL_DEF(src.sig_peak, src_range); dst.sig_peak = PL_DEF(dst.sig_peak, dst_range); // Defaults the signal average based on the SDR signal average. // Note: For HDR, this assumes well-mastered HDR content. src.sig_avg = PL_DEF(src.sig_avg, sdr_avg); // Defaults the dest average based on the source average, unless the source // is HDR and the destination is not, in which case fall back to SDR avg. if (!dst.sig_avg) { bool src_hdr = pl_color_transfer_is_hdr(src.transfer); bool dst_hdr = pl_color_transfer_is_hdr(dst.transfer); dst.sig_avg = src_hdr && !dst_hdr ? sdr_avg : src.sig_avg; } // All operations from here on require linear light as a starting point, // so we linearize even if src.gamma == dst.gamma when one of the other // operations needs it bool need_linear = src.transfer != dst.transfer || src.primaries != dst.primaries || src_range != dst_range || src.sig_peak > dst.sig_peak || src.sig_avg != dst.sig_avg || src.light != dst.light; // Various operations need access to the src_luma and dst_luma respectively, // so just always make them available if we're doing anything at all ident_t src_luma = NULL, dst_luma = NULL; if (need_linear) { struct pl_matrix3x3 rgb2xyz; rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(src.primaries)); src_luma = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("src_luma"), .data = rgb2xyz.m[1], // RGB->Y vector }); rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(dst.primaries)); dst_luma = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("dst_luma"), .data = rgb2xyz.m[1], // RGB->Y vector }); } bool is_linear = prelinearized; if (need_linear && !is_linear) { pl_shader_linearize(sh, src.transfer); is_linear = true; } if (src.light != dst.light) pl_shader_ootf(sh, src.light, src_luma); // Adapt to the right colorspace (primaries) if necessary if (src.primaries != dst.primaries) { const struct pl_raw_primaries *csp_src, *csp_dst; csp_src = pl_raw_primaries_get(src.primaries), csp_dst = pl_raw_primaries_get(dst.primaries); struct pl_matrix3x3 cms_mat; cms_mat = pl_get_color_mapping_matrix(csp_src, csp_dst, params->intent); GLSL("color.rgb = %s * color.rgb;\n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cms_matrix"), .data = PL_TRANSPOSE_3X3(cms_mat.m), })); // Since this can reduce the gamut, figure out by how much for (int c = 0; c < 3; c++) src.sig_peak = fmaxf(src.sig_peak, cms_mat.m[c][c]); } // Tone map to rescale the signal average/peak if needed if (src.sig_peak > dst.sig_peak) pl_shader_tone_map(sh, src, dst, dst_luma, peak_detect_state, params); // Warn for remaining out-of-gamut colors if enabled if (params->gamut_warning) { GLSL("if (any(greaterThan(color.rgb, vec3(1.01))) ||\n" " any(lessThan(color.rgb, vec3(-0.01))))\n" " color.rgb = vec3(1.0) - color.rgb; // invert\n"); } if (src.light != dst.light) pl_shader_inverse_ootf(sh, dst.light, dst_luma); if (is_linear) pl_shader_delinearize(sh, dst.transfer); GLSL("}\n"); } struct sh_dither_obj { enum pl_dither_method method; struct pl_shader_obj *lut; }; static void sh_dither_uninit(const struct pl_gpu *gpu, void *ptr) { struct sh_dither_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); *obj = (struct sh_dither_obj) {0}; } static void fill_dither_matrix(void *priv, float *data, int w, int h, int d) { pl_assert(w > 0 && h > 0 && d == 0); const struct sh_dither_obj *obj = priv; switch (obj->method) { case PL_DITHER_ORDERED_LUT: pl_assert(w == h); pl_generate_bayer_matrix(data, w); break; case PL_DITHER_BLUE_NOISE: pl_generate_blue_noise(data, w); break; default: abort(); } } static bool dither_method_is_lut(enum pl_dither_method method) { switch (method) { case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: return true; case PL_DITHER_ORDERED_FIXED: case PL_DITHER_WHITE_NOISE: return false; default: abort(); } } void pl_shader_dither(struct pl_shader *sh, int new_depth, struct pl_shader_obj **dither_state, const struct pl_dither_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (new_depth <= 0 || new_depth > 256) { PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth); return; } GLSL("// pl_shader_dither \n" "{ \n" "float bias; \n"); params = PL_DEF(params, &pl_dither_default_params); if (params->lut_size < 0 || params->lut_size > 8) { PL_ERR(sh, "Invalid `lut_size` specified: %d", params->lut_size); return; } enum pl_dither_method method = params->method; ident_t lut = NULL; int lut_size = 0; if (dither_method_is_lut(method)) { if (!dither_state) { PL_TRACE(sh, "LUT-based dither method specified but no dither state " "object given, falling back to non-LUT based methods."); goto fallback; } struct sh_dither_obj *obj; obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER, struct sh_dither_obj, sh_dither_uninit); if (!obj) goto fallback; bool changed = obj->method != method; obj->method = method; lut_size = 1 << PL_DEF(params->lut_size, 6); lut = sh_lut(sh, &obj->lut, SH_LUT_AUTO, lut_size, lut_size, 0, 1, changed, obj, fill_dither_matrix); if (!lut) goto fallback; } goto done; fallback: if (sh->gpu && sh->gpu->glsl.version >= 130) { method = PL_DITHER_ORDERED_FIXED; } else { method = PL_DITHER_WHITE_NOISE; } // fall through done: ; int size = 0; if (lut) { size = lut_size; } else if (method == PL_DITHER_ORDERED_FIXED) { size = 16; // hard-coded size } if (size) { // Transform the screen position to the cyclic range [0,1) GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/%d.0);\n", size); if (params->temporal) { int phase = sh->index % 8; float r = phase * (M_PI / 2); // rotate float m = phase < 4 ? 1 : -1; // mirror float mat[2][2] = { {cos(r), -sin(r) }, {sin(r) * m, cos(r) * m}, }; ident_t rot = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("dither_rot"), .data = &mat[0][0], .dynamic = true, }); GLSL("pos = fract(%s * pos + vec2(1.0));\n", rot); } } switch (method) { case PL_DITHER_WHITE_NOISE: { ident_t prng = sh_prng(sh, params->temporal, NULL); GLSL("bias = %s;\n", prng); break; } case PL_DITHER_ORDERED_FIXED: // Bitwise ordered dither using only 32-bit uints GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u; \n" // Bitwise merge (morton number) "xy.x = xy.x ^ xy.y; \n" "xy = (xy | xy << 2) & uvec2(0x33333333); \n" "xy = (xy | xy << 1) & uvec2(0x55555555); \n" // Bitwise inversion "uint b = xy.x + (xy.y << 1); \n" "b = (b * 0x0802u & 0x22110u) | \n" " (b * 0x8020u & 0x88440u); \n" "b = 0x10101u * b; \n" "b = (b >> 16) & 0xFFu; \n" // Generate bias value "bias = float(b) * 1.0/256.0; \n"); break; default: // LUT-based methods pl_assert(lut); GLSL("bias = %s(pos);\n", lut); break; } uint64_t scale = (1LLU << new_depth) - 1; GLSL("color = vec4(%f) * color + vec4(bias); \n" "color = floor(color) * vec4(%f); \n" "} \n", (float) scale, 1.0 / scale); } const struct pl_dither_params pl_dither_default_params = { .method = PL_DITHER_BLUE_NOISE, .temporal = false, // commonly flickers on LCDs }; libplacebo-0.4.0/src/shaders/sampling.c000066400000000000000000000604121324021332500200030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" const struct pl_deband_params pl_deband_default_params = { .iterations = 1, .threshold = 4.0, .radius = 16.0, .grain = 6.0, }; void pl_shader_deband(struct pl_shader *sh, const struct pl_tex *pl_tex, const struct pl_deband_params *params) { if (pl_tex->params.sample_mode != PL_TEX_SAMPLE_LINEAR) { PL_ERR(sh, "Debanding requires sample_mode = PL_TEX_SAMPLE_LINEAR!"); return; } if (!sh_require(sh, PL_SHADER_SIG_NONE, 0, 0)) return; GLSL("vec4 color;\n"); GLSL("// pl_shader_deband\n"); GLSL("{\n"); params = PL_DEF(params, &pl_deband_default_params); ident_t tex, pos, pt; tex = sh_bind(sh, pl_tex, "deband", NULL, &pos, NULL, &pt); if (!tex) return; ident_t prng, state; prng = sh_prng(sh, true, &state); GLSL("vec2 pos = %s; \n" "vec4 avg, diff; \n" "color = texture(%s, pos); \n", pos, tex); // Helper function: Compute a stochastic approximation of the avg color // around a pixel, given a specified radius ident_t average = sh_fresh(sh, "average"); GLSLH("vec4 %s(vec2 pos, float range, inout float %s) { \n" // Compute a random angle and distance " float dist = %s * range; \n" " float dir = %s * %f; \n" " vec2 o = dist * vec2(cos(dir), sin(dir)); \n" // Sample at quarter-turn intervals around the source pixel " vec4 sum = vec4(0.0); \n" " sum += texture(%s, pos + %s * vec2( o.x, o.y)); \n" " sum += texture(%s, pos + %s * vec2(-o.x, o.y)); \n" " sum += texture(%s, pos + %s * vec2(-o.x, -o.y)); \n" " sum += texture(%s, pos + %s * vec2( o.x, -o.y)); \n" // Return the (normalized) average " return 0.25 * sum; \n" "}\n", average, state, prng, prng, M_PI * 2, tex, pt, tex, pt, tex, pt, tex, pt); // For each iteration, compute the average at a given distance and // pick it instead of the color if the difference is below the threshold. for (int i = 1; i <= params->iterations; i++) { GLSL("avg = %s(pos, %f, %s); \n" "diff = abs(color - avg); \n" "color = mix(avg, color, greaterThan(diff, vec4(%f))); \n", average, i * params->radius, state, params->threshold / (1000 * i)); } // Add some random noise to smooth out residual differences if (params->grain > 0) { GLSL("vec3 noise = vec3(%s, %s, %s); \n" "color.rgb += %f * (noise - vec3(0.5)); \n", prng, prng, prng, params->grain / 1000.0); } GLSL("}\n"); } // Helper function to compute the src/dst sizes and upscaling ratios static bool setup_src(struct pl_shader *sh, const struct pl_sample_src *src, ident_t *src_tex, ident_t *pos, ident_t *size, ident_t *pt, float *ratio_x, float *ratio_y, int *components, bool resizeable) { float src_w = pl_rect_w(src->rect); float src_h = pl_rect_h(src->rect); src_w = PL_DEF(src_w, src->tex->params.w); src_h = PL_DEF(src_h, src->tex->params.h); int out_w = PL_DEF(src->new_w, fabs(src_w)); int out_h = PL_DEF(src->new_h, fabs(src_h)); if (ratio_x) *ratio_x = out_w / fabs(src_w); if (ratio_y) *ratio_y = out_h / fabs(src_h); if (components) { const struct pl_fmt *fmt = src->tex->params.format; *components = PL_DEF(src->components, fmt->num_components); } if (resizeable) out_w = out_h = 0; if (!sh_require(sh, PL_SHADER_SIG_NONE, out_w, out_h)) return false; struct pl_rect2df rect = { .x0 = src->rect.x0, .y0 = src->rect.y0, .x1 = src->rect.x0 + src_w, .y1 = src->rect.y0 + src_h, }; *src_tex = sh_bind(sh, src->tex, "src_tex", &rect, pos, size, pt); return true; } bool pl_shader_sample_direct(struct pl_shader *sh, const struct pl_sample_src *src) { ident_t tex, pos; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, NULL, true)) return false; GLSL("// pl_shader_sample_direct \n" "vec4 color = texture(%s, %s); \n", tex, pos); return true; } static void bicubic_calcweights(struct pl_shader *sh, const char *t, const char *s) { // Explanation of how bicubic scaling with only 4 texel fetches is done: // http://www.mate.tue.nl/mate/pdfs/10318.pdf // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' GLSL("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s \n" " + vec4(1, 0, -0.5, 0.5); \n" "%s = %s * %s + vec4(0.0, 0.0, -0.5, 0.5); \n" "%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666); \n" "%s.xy /= %s.zw; \n" "%s.xy += vec2(1.0 + %s, 1.0 - %s); \n", t, s, t, t, s, t, t, s, t, t, t, s, s); } bool pl_shader_sample_bicubic(struct pl_shader *sh, const struct pl_sample_src *src) { if (src->tex->params.sample_mode != PL_TEX_SAMPLE_LINEAR) { PL_ERR(sh, "Trying to use fast bicubic sampling from a texture without " "PL_TEX_SAMPLE_LINEAR"); return false; } ident_t tex, pos, size, pt; float rx, ry; if (!setup_src(sh, src, &tex, &pos, &size, &pt, &rx, &ry, NULL, true)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast bicubic sampling when downscaling. This " "will most likely result in nasty aliasing!"); } GLSL("// pl_shader_sample_bicubic \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = %s; \n" "vec2 pt = %s; \n" "vec2 size = %s; \n" "vec2 fcoord = fract(pos * size + vec2(0.5)); \n", pos, pt, size); bicubic_calcweights(sh, "parmx", "fcoord.x"); bicubic_calcweights(sh, "parmy", "fcoord.y"); GLSL("vec4 cdelta; \n" "cdelta.xz = parmx.rg * vec2(-pt.x, pt.x); \n" "cdelta.yw = parmy.rg * vec2(-pt.y, pt.y); \n" // first y-interpolation "vec4 ar = texture(%s, pos + cdelta.xy); \n" "vec4 ag = texture(%s, pos + cdelta.xw); \n" "vec4 ab = mix(ag, ar, parmy.b); \n" // second y-interpolation "vec4 br = texture(%s, pos + cdelta.zy); \n" "vec4 bg = texture(%s, pos + cdelta.zw); \n" "vec4 aa = mix(bg, br, parmy.b); \n" // x-interpolation "color = mix(aa, ab, parmx.b); \n" "} \n", tex, tex, tex, tex); return true; } static bool filter_compat(const struct pl_filter *filter, float inv_scale, int lut_entries, float cutoff, const struct pl_filter_config *params) { if (!filter) return false; if (filter->params.lut_entries != lut_entries) return false; if (fabs(filter->params.filter_scale - inv_scale) > 1e-3) return false; if (filter->params.cutoff != cutoff) return false; return pl_filter_config_eq(&filter->params.config, params); } // Subroutine for computing and adding an individual texel contribution // If `in` is NULL, samples directly // If `in` is set, takes the pixel from inX[idx] where X is the component, // `in` is the given identifier, and `idx` must be defined by the caller static void polar_sample(struct pl_shader *sh, const struct pl_filter *filter, ident_t tex, ident_t lut, int x, int y, int comps, ident_t in) { // Since we can't know the subpixel position in advance, assume a // worst case scenario int yy = y > 0 ? y-1 : y; int xx = x > 0 ? x-1 : x; float dmax = sqrt(xx*xx + yy*yy); // Skip samples definitely outside the radius if (dmax >= filter->radius_cutoff) return; GLSL("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); // Check for samples that might be skippable bool maybe_skippable = dmax >= filter->radius_cutoff - M_SQRT2; if (maybe_skippable) GLSL("if (d < %f) {\n", filter->radius_cutoff); // Get the weight for this pixel GLSL("w = %s(d * 1.0/%f); \n" "wsum += w; \n", lut, filter->radius); if (in) { for (int n = 0; n < comps; n++) GLSL("color[%d] += w * %s%d[idx];\n", n, in, n); } else { GLSL("in0 = texture(%s, base + pt * vec2(%d.0, %d.0)); \n" "color += vec4(w) * in0; \n", tex, x, y); } if (maybe_skippable) GLSL("}\n"); } struct sh_sampler_obj { const struct pl_filter *filter; struct pl_shader_obj *lut; }; static void sh_sampler_uninit(const struct pl_gpu *gpu, void *ptr) { struct sh_sampler_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); pl_filter_free(&obj->filter); *obj = (struct sh_sampler_obj) {0}; } static void fill_polar_lut(void *priv, float *data, int w, int h, int d) { const struct sh_sampler_obj *obj = priv; const struct pl_filter *filt = obj->filter; pl_assert(w == filt->params.lut_entries); memcpy(data, filt->weights, w * sizeof(float)); } bool pl_shader_sample_polar(struct pl_shader *sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (!params->filter.polar) { PL_ERR(sh, "Trying to use polar sampling with a non-polar filter?"); return false; } const struct pl_gpu *gpu = sh->gpu; const struct pl_tex *tex = src->tex; pl_assert(gpu && tex); bool has_compute = gpu->caps & PL_GPU_CAP_COMPUTE && !params->no_compute; bool flipped = src->rect.x0 > src->rect.x1 || src->rect.y0 > src->rect.y1; if (flipped && has_compute) { PL_WARN(sh, "Trying to use a flipped src.rect with polar sampling! " "This prevents the use of compute shaders, which is a " "potentially massive performance hit. If you're really sure you " "want this, set params.no_compute to suppress this warning."); has_compute = false; } int comps; float rx, ry; ident_t src_tex, pos, size, pt; if (!setup_src(sh, src, &src_tex, &pos, &size, &pt, &rx, &ry, &comps, false)) return false; struct sh_sampler_obj *obj; obj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; float inv_scale = 1.0 / PL_MIN(rx, ry); inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; int lut_entries = PL_DEF(params->lut_entries, 64); float cutoff = PL_DEF(params->cutoff, 0.001); bool update = !filter_compat(obj->filter, inv_scale, lut_entries, cutoff, ¶ms->filter); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->ctx, &(struct pl_filter_params) { .config = params->filter, .lut_entries = lut_entries, .filter_scale = inv_scale, .cutoff = cutoff, }); if (!obj->filter) { // This should never happen, but just in case .. PL_ERR(sh, "Failed initializing polar filter!"); return false; } } ident_t lut = sh_lut(sh, &obj->lut, SH_LUT_LINEAR, lut_entries, 0, 0, 1, update, obj, fill_polar_lut); if (!lut) { PL_ERR(sh, "Failed initializing polar LUT!"); return false; } GLSL("// pl_shader_sample_polar \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = %s, size = %s, pt = %s; \n" "vec2 fcoord = fract(pos * size - vec2(0.5)); \n" "vec2 base = pos - pt * fcoord; \n" "float w, d, wsum = 0.0; \n" "int idx; \n" "vec4 c; \n", pos, size, pt); int bound = ceil(obj->filter->radius_cutoff); int offset = bound - 1; // padding top/left int padding = offset + bound; // total padding // For performance we want to load at least as many pixels horizontally as // there are threads in a warp, as well as enough to take advantage of // shmem parallelism. However, on the other hand, to hide latency we want // to avoid making the kernel too large. A good size overall is 256 // threads, which allows at least 8 to run in parallel assuming good VGPR // distribution. A good trade-off for the horizontal row size is 32, which // is the warp size on nvidia. Going up to 64 (AMD's wavefront size) // is not worth it even on AMD hardware. const int bw = 32, bh = 256 / bw; // We need to sample everything from base_min to base_max, so make sure // we have enough room in shmem int iw = (int) ceil(bw / rx) + padding + 1, ih = (int) ceil(bh / ry) + padding + 1; ident_t in = NULL; int shmem_req = iw * ih * comps * sizeof(float); if (has_compute && sh_try_compute(sh, bw, bh, false, shmem_req)) { // Compute shader kernel GLSL("vec2 wpos = %s_map(gl_WorkGroupID * gl_WorkGroupSize); \n" "vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5)); \n" "ivec2 rel = ivec2(round((base - wbase) * size)); \n", pos); // Load all relevant texels into shmem GLSL("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) { \n" "for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) { \n" "c = texture(%s, wbase + pt * vec2(x - %d, y - %d)); \n", ih, bh, iw, bw, src_tex, offset, offset); in = sh_fresh(sh, "in"); for (int c = 0; c < comps; c++) { GLSLH("shared float %s%d[%d]; \n", in, c, ih * iw); GLSL("%s%d[%d * y + x] = c[%d]; \n", in, c, iw, c); } GLSL("}} \n" "groupMemoryBarrier(); \n" "barrier(); \n"); // Dispatch the actual samples for (int y = 1 - bound; y <= bound; y++) { for (int x = 1 - bound; x <= bound; x++) { GLSL("idx = %d * rel.y + rel.x + %d;\n", iw, iw * (y + offset) + x + offset); polar_sample(sh, obj->filter, src_tex, lut, x, y, comps, in); } } } else { // Fragment shader sampling for (int n = 0; n < comps; n++) GLSL("vec4 in%d;\n", n); // Iterate over the LUT space in groups of 4 texels at a time, and // decide for each texel group whether to use gathering or direct // sampling. for (int y = 1 - bound; y <= bound; y += 2) { for (int x = 1 - bound; x <= bound; x += 2) { // Using texture gathering is only more efficient than direct // sampling in the case where we expect to be able to use all // four gathered texels, without having to discard any. So // only do it if we suspsect it will be a win rather than a // loss. bool use_gather = sqrt(x*x + y*y) < obj->filter->radius_cutoff; // Make sure all required features are supported use_gather &= gpu->glsl.version >= 400; use_gather &= gpu->limits.max_gather_offset != 0; use_gather &= PL_MAX(x, y) <= gpu->limits.max_gather_offset; use_gather &= PL_MIN(x, y) >= gpu->limits.min_gather_offset; if (!use_gather) { // Switch to direct sampling instead for (int yy = y; yy <= bound && yy <= y + 1; yy++) { for (int xx = x; xx <= bound && xx <= x + 1; xx++) { polar_sample(sh, obj->filter, src_tex, lut, xx, yy, comps, NULL); } } continue; // next group of 4 } // Gather the four surrounding texels simultaneously for (int n = 0; n < comps; n++) { GLSL("in%d = textureGatherOffset(%s, base, " "ivec2(%d, %d), %d);\n", n, src_tex, x, y, n); } // Mix in all of the points with their weights for (int p = 0; p < 4; p++) { // The four texels are gathered counterclockwise starting // from the bottom left static const int xo[4] = {0, 1, 1, 0}; static const int yo[4] = {1, 1, 0, 0}; if (x+xo[p] > bound || y+yo[p] > bound) continue; // next subpixel GLSL("idx = %d;\n", p); polar_sample(sh, obj->filter, src_tex, lut, x+xo[p], y+yo[p], comps, "in"); } } } } GLSL("color = color / vec4(wsum); \n" "} \n"); return true; } struct sh_sampler_sep_obj { struct pl_shader_obj *samplers[2]; }; static void sh_sampler_sep_uninit(const struct pl_gpu *gpu, void *ptr) { struct sh_sampler_sep_obj *obj = ptr; for (int i = 0; i < PL_ARRAY_SIZE(obj->samplers); i++) pl_shader_obj_destroy(&obj->samplers[i]); *obj = (struct sh_sampler_sep_obj) {0}; } static void fill_ortho_lut(void *priv, float *data, int w, int h, int d) { const struct sh_sampler_obj *obj = priv; const struct pl_filter *filt = obj->filter; pl_assert(w * h * 4 == filt->params.lut_entries * filt->row_stride); memcpy(data, filt->weights, w * h * 4 * sizeof(float)); } bool pl_shader_sample_ortho(struct pl_shader *sh, int pass, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (params->filter.polar) { PL_ERR(sh, "Trying to use separated sampling with a polar filter?"); return false; } const struct pl_gpu *gpu = sh->gpu; const struct pl_tex *tex = src->tex; pl_assert(gpu && tex); struct pl_sample_src srcfix = *src; switch (pass) { case PL_SEP_VERT: srcfix.rect.x0 = 0; srcfix.rect.x1 = srcfix.new_w = tex->params.w; break; case PL_SEP_HORIZ: srcfix.rect.y0 = 0; srcfix.rect.y1 = srcfix.new_h = tex->params.h; break; case PL_SEP_PASSES: default: abort(); } int comps; float ratio[2]; ident_t src_tex, pos, size, pt; if (!setup_src(sh, &srcfix, &src_tex, &pos, &size, &pt, &ratio[1], &ratio[0], &comps, false)) { return false; } // We can store a separate sampler object per dimension, so dispatch the // right one. This is needed for two reasons: // 1. Anamorphic content can have a different scaling ratio for each // dimension. In particular, you could be upscaling in one and // downscaling in the other. // 2. After fixing the source for `setup_src`, we lose information about // the scaling ratio of the other component. (Although this is only a // minor reason and could easily be changed with some boilerplate) struct sh_sampler_sep_obj *sepobj; sepobj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER_SEP, struct sh_sampler_sep_obj, sh_sampler_sep_uninit); if (!sepobj) return false; struct sh_sampler_obj *obj; obj = SH_OBJ(sh, &sepobj->samplers[pass], PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; float inv_scale = 1.0 / ratio[pass]; inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; int lut_entries = PL_DEF(params->lut_entries, 64); bool update = !filter_compat(obj->filter, inv_scale, lut_entries, 0.0, ¶ms->filter); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->ctx, &(struct pl_filter_params) { .config = params->filter, .lut_entries = lut_entries, .filter_scale = inv_scale, .max_row_size = gpu->limits.max_tex_2d_dim / 4, .row_stride_align = 4, }); if (!obj->filter) { // This should never happen, but just in case .. PL_ERR(sh, "Failed initializing separated filter!"); return false; } } int N = obj->filter->row_size; // number of samples to convolve int width = obj->filter->row_stride / 4; // width of the LUT texture ident_t lut = sh_lut(sh, &obj->lut, SH_LUT_LINEAR, width, lut_entries, 0, 4, update, obj, fill_ortho_lut); if (!lut) { PL_ERR(sh, "Failed initializing separated LUT!"); return false; } const float dir[PL_SEP_PASSES][2] = { [PL_SEP_HORIZ] = {1.0, 0.0}, [PL_SEP_VERT] = {0.0, 1.0}, }; GLSL("// pl_shader_sample_ortho \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = %s, size = %s, pt = %s; \n" "vec2 dir = vec2(%f, %f); \n" "pt *= dir; \n" "vec2 fcoord2 = fract(pos * size - vec2(0.5)); \n" "float fcoord = dot(fcoord2, dir); \n" "vec2 base = pos - fcoord * pt - pt * vec2(%d.0); \n" "float weight; \n" "vec4 ws, c; \n", pos, size, pt, dir[pass][0], dir[pass][1], N / 2 - 1); bool use_ar = params->antiring > 0; if (use_ar) { GLSL("vec4 hi = vec4(0.0); \n" "vec4 lo = vec4(1e9); \n"); } // Dispatch all of the samples GLSL("// scaler samples\n"); for (int n = 0; n < N; n++) { // Load the right weight for this instance. For every 4th weight, we // need to fetch another LUT entry. Otherwise, just use the previous if (n % 4 == 0) { float denom = PL_MAX(1, width - 1); // avoid division by zero GLSL("ws = %s(vec2(%f, fcoord));\n", lut, (n / 4) / denom); } GLSL("weight = ws[%d];\n", n % 4); // Load the input texel and add it to the running sum GLSL("c = texture(%s, base + pt * vec2(%d.0)); \n" "color += vec4(weight) * c; \n", src_tex, n); if (use_ar && (n == N / 2 - 1 || n == N / 2)) { GLSL("lo = min(lo, c); \n" "hi = max(hi, c); \n"); } } if (use_ar) { GLSL("color = mix(color, clamp(color, lo, hi), %f);\n", params->antiring); } GLSL("}\n"); return true; } libplacebo-0.4.0/src/spirv.c000066400000000000000000000033341324021332500157030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "spirv.h" extern const struct spirv_compiler_fns spirv_shaderc; static const struct spirv_compiler_fns *compilers[] = { #if PL_HAVE_SHADERC &spirv_shaderc, #endif }; struct spirv_compiler *spirv_compiler_create(struct pl_context *ctx) { for (int i = 0; i < PL_ARRAY_SIZE(compilers); i++) { const struct spirv_compiler_fns *impl = compilers[i]; struct spirv_compiler *spirv = talloc_zero(NULL, struct spirv_compiler); spirv->ctx = ctx; spirv->impl = impl; strncpy(spirv->name, impl->name, sizeof(spirv->name)); pl_info(ctx, "Initializing SPIR-V compiler '%s'", impl->name); if (impl->init(spirv)) return spirv; talloc_free(spirv); } pl_fatal(ctx, "Failed initializing any SPIR-V compiler! Maybe " "libplacebo was built without support for libshaderc?"); return NULL; } void spirv_compiler_destroy(struct spirv_compiler **spirv) { if (!*spirv) return; (*spirv)->impl->uninit(*spirv); TA_FREEP(spirv); } libplacebo-0.4.0/src/spirv.h000066400000000000000000000036221324021332500157100ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "context.h" enum glsl_shader_stage { GLSL_SHADER_VERTEX, GLSL_SHADER_FRAGMENT, GLSL_SHADER_COMPUTE, }; #define SPIRV_NAME_MAX_LEN 32 struct spirv_compiler { char name[SPIRV_NAME_MAX_LEN]; // for cache invalidation struct pl_context *ctx; const struct spirv_compiler_fns *impl; // implementation-specific fields void *priv; struct pl_glsl_desc glsl; // supported GLSL capabilities int compiler_version; // for cache invalidation, may be left as 0 }; struct spirv_compiler_fns { const char *name; // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics. bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx, enum glsl_shader_stage type, const char *glsl, struct bstr *out_spirv); // Only needs to initialize the implementation-specific fields bool (*init)(struct spirv_compiler *spirv); void (*uninit)(struct spirv_compiler *spirv); }; // Initialize a SPIR-V compiler instance, or returns NULL on failure. struct spirv_compiler *spirv_compiler_create(struct pl_context *ctx); void spirv_compiler_destroy(struct spirv_compiler **spirv); libplacebo-0.4.0/src/spirv_shaderc.c000066400000000000000000000125621324021332500173770ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #define _XOPEN_SOURCE 700 #include #include #ifdef __APPLE__ # include # include #endif #include #include "spirv.h" struct priv { shaderc_compiler_t compiler; shaderc_compile_options_t opts; locale_t cloc; }; static void shaderc_uninit(struct spirv_compiler *spirv) { struct priv *p = spirv->priv; shaderc_compile_options_release(p->opts); shaderc_compiler_release(p->compiler); freelocale(p->cloc); TA_FREEP(&spirv->priv); } static bool shaderc_init(struct spirv_compiler *spirv) { struct priv *p = spirv->priv = talloc_zero(spirv, struct priv); p->cloc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); if (!p->cloc) { PL_FATAL(spirv, "Failed initializing C locale?!"); goto error; } p->compiler = shaderc_compiler_initialize(); if (!p->compiler) goto error; p->opts = shaderc_compile_options_initialize(); if (!p->opts) goto error; shaderc_compile_options_set_optimization_level(p->opts, shaderc_optimization_level_size); int ver, rev; shaderc_get_spv_version(&ver, &rev); spirv->compiler_version = ver * 100 + rev; spirv->glsl = (struct pl_glsl_desc) { .version = 450, // this is impossible to query, so hard-code it .vulkan = true, }; return true; error: shaderc_uninit(spirv); return false; } static shaderc_compilation_result_t compile(struct priv *p, enum glsl_shader_stage type, const char *glsl, bool debug) { static const shaderc_shader_kind kinds[] = { [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, }; if (debug) { return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl), kinds[type], "input", "main", p->opts); } else { return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl), kinds[type], "input", "main", p->opts); } } static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx, enum glsl_shader_stage type, const char *glsl, struct bstr *out_spirv) { struct priv *p = spirv->priv; // Switch to C locale to work around libshaderc bugs locale_t oldloc = uselocale((locale_t) 0); uselocale(p->cloc); shaderc_compilation_result_t res = compile(p, type, glsl, false); int errs = shaderc_result_get_num_errors(res), warn = shaderc_result_get_num_warnings(res); enum pl_log_level lev = errs ? PL_LOG_ERR : warn ? PL_LOG_INFO : PL_LOG_DEBUG; const char *msg = shaderc_result_get_error_message(res); if (msg[0]) PL_MSG(spirv, lev, "shaderc output:\n%s", msg); int s = shaderc_result_get_compilation_status(res); bool success = s == shaderc_compilation_status_success; static const char *results[] = { [shaderc_compilation_status_success] = "success", [shaderc_compilation_status_invalid_stage] = "invalid stage", [shaderc_compilation_status_compilation_error] = "error", [shaderc_compilation_status_internal_error] = "internal error", [shaderc_compilation_status_null_result_object] = "no result", [shaderc_compilation_status_invalid_assembly] = "invalid assembly", }; const char *status = s < PL_ARRAY_SIZE(results) ? results[s] : "unknown"; PL_MSG(spirv, lev, "shaderc compile status '%s' (%d errors, %d warnings)", status, errs, warn); if (success) { void *bytes = (void *) shaderc_result_get_bytes(res); out_spirv->len = shaderc_result_get_length(res); out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len); } // Also print SPIR-V disassembly for debugging purposes. Unfortunately // there doesn't seem to be a way to get this except compiling the shader // a second time.. if (pl_msg_test(spirv->ctx, PL_LOG_TRACE)) { shaderc_compilation_result_t dis = compile(p, type, glsl, true); PL_TRACE(spirv, "Generated SPIR-V:\n%.*s", (int) shaderc_result_get_length(dis), shaderc_result_get_bytes(dis)); shaderc_result_release(dis); } shaderc_result_release(res); uselocale(oldloc); return success; } const struct spirv_compiler_fns spirv_shaderc = { .name = "shaderc", .compile_glsl = shaderc_compile, .init = shaderc_init, .uninit = shaderc_uninit, }; libplacebo-0.4.0/src/swapchain.c000066400000000000000000000030101324021332500165040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "context.h" #include "swapchain.h" void pl_swapchain_destroy(const struct pl_swapchain **ptr) { const struct pl_swapchain *sw = *ptr; if (!sw) return; sw->impl->destroy(sw); *ptr = NULL; } int pl_swapchain_latency(const struct pl_swapchain *sw) { if (!sw->impl->latency) return 0; return sw->impl->latency(sw); } bool pl_swapchain_start_frame(const struct pl_swapchain *sw, struct pl_swapchain_frame *out_frame) { *out_frame = (struct pl_swapchain_frame) {0}; // sanity return sw->impl->start_frame(sw, out_frame); } bool pl_swapchain_submit_frame(const struct pl_swapchain *sw) { return sw->impl->submit_frame(sw); } void pl_swapchain_swap_buffers(const struct pl_swapchain *sw) { sw->impl->swap_buffers(sw); } libplacebo-0.4.0/src/swapchain.h000066400000000000000000000020511324021332500165150ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #define SW_PFN(name) __typeof__(pl_swapchain_##name) *name struct pl_sw_fns { // This destructor follows the same rules as `pl_gpu_fns` void (*destroy)(const struct pl_swapchain *sw); SW_PFN(latency); // optional SW_PFN(start_frame); SW_PFN(submit_frame); SW_PFN(swap_buffers); }; #undef SW_PFN libplacebo-0.4.0/src/ta/000077500000000000000000000000001324021332500147755ustar00rootroot00000000000000libplacebo-0.4.0/src/ta/README000066400000000000000000000017221324021332500156570ustar00rootroot00000000000000TA ("Tree Allocator") is a wrapper around malloc() and related functions, adding features like automatically freeing sub-trees of memory allocations if a parent allocation is freed. Generally, the idea is that every TA allocation can have a parent (indicated by the ta_parent argument in allocation function calls). If a parent is freed, its child allocations are automatically freed as well. It is also allowed to free a child before the parent, or to move a child to another parent with ta_set_parent(). It also provides a bunch of convenience macros and debugging facilities. The TA functions are documented in the implementation files (ta.c, ta_utils.c). Note: ----- This code was ported from the mpv project (https://mpv.io), and re-uses some of mpv's convenience wrappers (talloc.c/h), which wrap the underlying ta_* API calls provided by talloc (ta.c/h). Documentation for the underlying talloc API is here: http://talloc.samba.org/talloc/doc/html/modules.html libplacebo-0.4.0/src/ta/ta.c000066400000000000000000000327731324021332500155610ustar00rootroot00000000000000/* Copyright (C) 2017 the mpv developers * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #define TA_NO_WRAPPERS #include "ta.h" // Note: the actual minimum alignment is dictated by malloc(). It doesn't // make sense to set this value higher than malloc's alignment. #define MIN_ALIGN 16 #ifndef NDEBUG #define TA_MEMORY_DEBUGGING #endif struct ta_header { size_t size; // size of the user allocation struct ta_header *prev; // ring list containing siblings struct ta_header *next; struct ta_ext_header *ext; #ifdef TA_MEMORY_DEBUGGING unsigned int canary; struct ta_header *leak_next; struct ta_header *leak_prev; const char *name; #endif }; #define CANARY 0xD3ADB3EF union aligned_header { struct ta_header ta; // Make sure to satisfy typical alignment requirements void *align_ptr; int align_int; double align_d; long long align_ll; char align_min[(sizeof(struct ta_header) + MIN_ALIGN - 1) & ~(MIN_ALIGN - 1)]; }; #define PTR_TO_HEADER(ptr) (&((union aligned_header *)(ptr) - 1)->ta) #define PTR_FROM_HEADER(h) ((void *)((union aligned_header *)(h) + 1)) #define MAX_ALLOC (((size_t)-1) - sizeof(union aligned_header)) // Needed for non-leaf allocations, or extended features such as destructors. struct ta_ext_header { struct ta_header *header; // points back to normal header struct ta_header children; // list of children, with this as sentinel void (*destructor)(void *); }; // ta_ext_header.children.size is set to this #define CHILDREN_SENTINEL ((size_t)-1) static void ta_dbg_add(struct ta_header *h); static void ta_dbg_check_header(struct ta_header *h); static void ta_dbg_remove(struct ta_header *h); static struct ta_header *get_header(void *ptr) { struct ta_header *h = ptr ? PTR_TO_HEADER(ptr) : NULL; ta_dbg_check_header(h); return h; } static struct ta_ext_header *get_or_alloc_ext_header(void *ptr) { struct ta_header *h = get_header(ptr); if (!h) return NULL; if (!h->ext) { h->ext = malloc(sizeof(struct ta_ext_header)); if (!h->ext) return NULL; *h->ext = (struct ta_ext_header) { .header = h, .children = { .next = &h->ext->children, .prev = &h->ext->children, // Needed by ta_find_parent(): .size = CHILDREN_SENTINEL, .ext = h->ext, }, }; } return h->ext; } /* Set the parent allocation of ptr. If parent==NULL, remove the parent. * Setting parent==NULL (with ptr!=NULL) always succeeds, and unsets the * parent of ptr. Operations ptr==NULL always succeed and do nothing. * Returns true on success, false on OOM. * * Warning: if ta_parent is a direct or indirect child of ptr, things will go * wrong. The function will apparently succeed, but creates circular * parent links, which are not allowed. */ bool ta_set_parent(void *ptr, void *ta_parent) { struct ta_header *ch = get_header(ptr); if (!ch) return true; struct ta_ext_header *parent_eh = get_or_alloc_ext_header(ta_parent); if (ta_parent && !parent_eh) // do nothing on OOM return false; // Unlink from previous parent if (ch->next) { ch->next->prev = ch->prev; ch->prev->next = ch->next; ch->next = ch->prev = NULL; } // Link to new parent - insert at end of list (possibly orders destructors) if (parent_eh) { struct ta_header *children = &parent_eh->children; ch->next = children; ch->prev = children->prev; children->prev->next = ch; children->prev = ch; } return true; } /* Allocate size bytes of memory. If ta_parent is not NULL, this is used as * parent allocation (if ta_parent is freed, this allocation is automatically * freed as well). size==0 allocates a block of size 0 (i.e. returns non-NULL). * Returns NULL on OOM. */ void *ta_alloc_size(void *ta_parent, size_t size) { if (size >= MAX_ALLOC) return NULL; struct ta_header *h = malloc(sizeof(union aligned_header) + size); if (!h) return NULL; *h = (struct ta_header) {.size = size}; ta_dbg_add(h); void *ptr = PTR_FROM_HEADER(h); if (!ta_set_parent(ptr, ta_parent)) { ta_free(ptr); return NULL; } return ptr; } /* Exactly the same as ta_alloc_size(), but the returned memory block is * initialized to 0. */ void *ta_zalloc_size(void *ta_parent, size_t size) { if (size >= MAX_ALLOC) return NULL; struct ta_header *h = calloc(1, sizeof(union aligned_header) + size); if (!h) return NULL; *h = (struct ta_header) {.size = size}; ta_dbg_add(h); void *ptr = PTR_FROM_HEADER(h); if (!ta_set_parent(ptr, ta_parent)) { ta_free(ptr); return NULL; } return ptr; } /* Reallocate the allocation given by ptr and return a new pointer. Much like * realloc(), the returned pointer can be different, and on OOM, NULL is * returned. * * size==0 is equivalent to ta_free(ptr). * ptr==NULL is equivalent to ta_alloc_size(ta_parent, size). * * ta_parent is used only in the ptr==NULL case. * * Returns NULL if the operation failed. * NULL is also returned if size==0. */ void *ta_realloc_size(void *ta_parent, void *ptr, size_t size) { if (size >= MAX_ALLOC) return NULL; if (!size) { ta_free(ptr); return NULL; } if (!ptr) return ta_alloc_size(ta_parent, size); struct ta_header *h = get_header(ptr); struct ta_header *old_h = h; if (h->size == size) return ptr; ta_dbg_remove(h); h = realloc(h, sizeof(union aligned_header) + size); ta_dbg_add(h ? h : old_h); if (!h) return NULL; h->size = size; if (h != old_h) { if (h->next) { // Relink siblings h->next->prev = h; h->prev->next = h; } if (h->ext) { // Relink children h->ext->header = h; h->ext->children.next->prev = &h->ext->children; h->ext->children.prev->next = &h->ext->children; } } return PTR_FROM_HEADER(h); } /* Return the allocated size of ptr. This returns the size parameter of the * most recent ta_alloc.../ta_realloc... call. * If ptr==NULL, return 0. */ size_t ta_get_size(void *ptr) { struct ta_header *h = get_header(ptr); return h ? h->size : 0; } /* Free all allocations that (recursively) have ptr as parent allocation, but * do not free ptr itself. */ void ta_free_children(void *ptr) { struct ta_header *h = get_header(ptr); struct ta_ext_header *eh = h ? h->ext : NULL; if (!eh) return; while (eh->children.next != &eh->children) ta_free(PTR_FROM_HEADER(eh->children.next)); } /* Free the given allocation, and all of its direct and indirect children. */ void ta_free(void *ptr) { struct ta_header *h = get_header(ptr); if (!h) return; if (h->ext && h->ext->destructor) h->ext->destructor(ptr); ta_free_children(ptr); if (h->next) { // Unlink from sibling list h->next->prev = h->prev; h->prev->next = h->next; } ta_dbg_remove(h); free(h->ext); free(h); } /* Set a destructor that is to be called when the given allocation is freed. * (Whether the allocation is directly freed with ta_free() or indirectly by * freeing its parent does not matter.) There is only one destructor. If an * destructor was already set, it's overwritten. * * The destructor will be called with ptr as argument. The destructor can do * almost anything, but it must not attempt to free or realloc ptr. The * destructor is run before the allocation's children are freed (also, before * their destructors are run). * * Returns false if ptr==NULL, or on OOM. */ bool ta_set_destructor(void *ptr, void (*destructor)(void *)) { struct ta_ext_header *eh = get_or_alloc_ext_header(ptr); if (!eh) return false; eh->destructor = destructor; return true; } /* Return the ptr's parent allocation, or NULL if there isn't any. * * Warning: this has O(N) runtime complexity with N sibling allocations! */ void *ta_find_parent(void *ptr) { struct ta_header *h = get_header(ptr); if (!h || !h->next) return NULL; for (struct ta_header *cur = h->next; cur != h; cur = cur->next) { if (cur->size == CHILDREN_SENTINEL) return PTR_FROM_HEADER(cur->ext->header); } return NULL; } #ifdef TA_MEMORY_DEBUGGING #include static pthread_mutex_t ta_dbg_mutex = PTHREAD_MUTEX_INITIALIZER; static bool enable_leak_check; // pretty much constant static struct ta_header leak_node; static char allocation_is_string; static void ta_dbg_add(struct ta_header *h) { h->canary = CANARY; if (enable_leak_check) { pthread_mutex_lock(&ta_dbg_mutex); h->leak_next = &leak_node; h->leak_prev = leak_node.leak_prev; leak_node.leak_prev->leak_next = h; leak_node.leak_prev = h; pthread_mutex_unlock(&ta_dbg_mutex); } } static void ta_dbg_check_header(struct ta_header *h) { if (h) assert(h->canary == CANARY); } static void ta_dbg_remove(struct ta_header *h) { ta_dbg_check_header(h); if (h->leak_next) { // assume checking for !=NULL invariant ok without lock pthread_mutex_lock(&ta_dbg_mutex); h->leak_next->leak_prev = h->leak_prev; h->leak_prev->leak_next = h->leak_next; pthread_mutex_unlock(&ta_dbg_mutex); h->leak_next = h->leak_prev = NULL; } h->canary = 0; } static size_t get_children_size(struct ta_header *h) { size_t size = 0; if (h->ext) { struct ta_header *s; for (s = h->ext->children.next; s != &h->ext->children; s = s->next) size += s->size + get_children_size(s); } return size; } void ta_print_leak_report(void) { if (!enable_leak_check) return; pthread_mutex_lock(&ta_dbg_mutex); if (leak_node.leak_next && leak_node.leak_next != &leak_node) { size_t size = 0; size_t num_blocks = 0; fprintf(stderr, "Blocks not freed:\n"); fprintf(stderr, " %-20s %10s %10s %s\n", "Ptr", "Bytes", "C. Bytes", "Name"); while (leak_node.leak_next != &leak_node) { struct ta_header *cur = leak_node.leak_next; // Don't list those with parent; logically, only parents are listed if (!cur->next) { size_t c_size = get_children_size(cur); char name[256] = {0}; if (cur->name) snprintf(name, sizeof(name), "%s", cur->name); if (cur->name == &allocation_is_string) { snprintf(name, sizeof(name), "'%.*s'", (int)cur->size, (char *)PTR_FROM_HEADER(cur)); } for (int n = 0; n < sizeof(name); n++) { if (name[n] && name[n] < 0x20) name[n] = '.'; } fprintf(stderr, " %-20p %10zu %10zu %s\n", cur, cur->size, c_size, name); } size += cur->size; num_blocks += 1; // Unlink, and don't confuse valgrind by leaving live pointers. cur->leak_next->leak_prev = cur->leak_prev; cur->leak_prev->leak_next = cur->leak_next; cur->leak_next = cur->leak_prev = NULL; } fprintf(stderr, "%zu bytes in %zu blocks.\n", size, num_blocks); } pthread_mutex_unlock(&ta_dbg_mutex); } void ta_enable_leak_report(void) { pthread_mutex_lock(&ta_dbg_mutex); enable_leak_check = true; if (!leak_node.leak_prev && !leak_node.leak_next) { leak_node.leak_prev = &leak_node; leak_node.leak_next = &leak_node; } pthread_mutex_unlock(&ta_dbg_mutex); } /* Set a (static) string that will be printed if the memory allocation in ptr * shows up on the leak report. The string must stay valid until ptr is freed. * Calling it on ptr==NULL does nothing. * Typically used to set location info. * Always returns ptr (useful for chaining function calls). */ void *ta_dbg_set_loc(void *ptr, const char *loc) { struct ta_header *h = get_header(ptr); if (h) h->name = loc; return ptr; } /* Mark the allocation as string. The leak report will print it literally. */ void *ta_dbg_mark_as_string(void *ptr) { // Specially handled by leak report code. return ta_dbg_set_loc(ptr, &allocation_is_string); } #else static void ta_dbg_add(struct ta_header *h){} static void ta_dbg_check_header(struct ta_header *h){} static void ta_dbg_remove(struct ta_header *h){} void ta_print_leak_report(void){} void ta_enable_leak_report(void){} void *ta_dbg_set_loc(void *ptr, const char *loc){return ptr;} void *ta_dbg_mark_as_string(void *ptr){return ptr;} #endif libplacebo-0.4.0/src/ta/ta.h000066400000000000000000000166531324021332500155650ustar00rootroot00000000000000/* Copyright (C) 2017 the mpv developers * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef TA_H_ #define TA_H_ #include #include #include #ifdef __GNUC__ #define TA_PRF(a1, a2) __attribute__ ((format(printf, a1, a2))) #define TA_TYPEOF(t) __typeof__(t) #else #define TA_PRF(a1, a2) #define TA_TYPEOF(t) void * #endif // Broken crap with __USE_MINGW_ANSI_STDIO #if defined(__MINGW32__) && defined(__GNUC__) && !defined(__clang__) #undef TA_PRF #define TA_PRF(a1, a2) __attribute__ ((format (gnu_printf, a1, a2))) #endif #define TA_STRINGIFY_(x) # x #define TA_STRINGIFY(x) TA_STRINGIFY_(x) #ifdef NDEBUG #define TA_LOC "" #else #define TA_LOC __FILE__ ":" TA_STRINGIFY(__LINE__) #endif // Core functions void *ta_alloc_size(void *ta_parent, size_t size); void *ta_zalloc_size(void *ta_parent, size_t size); void *ta_realloc_size(void *ta_parent, void *ptr, size_t size); size_t ta_get_size(void *ptr); void ta_free(void *ptr); void ta_free_children(void *ptr); bool ta_set_destructor(void *ptr, void (*destructor)(void *)); bool ta_set_parent(void *ptr, void *ta_parent); void *ta_find_parent(void *ptr); // Utility functions size_t ta_calc_array_size(size_t element_size, size_t count); size_t ta_calc_prealloc_elems(size_t nextidx); void *ta_new_context(void *ta_parent); void *ta_steal_(void *ta_parent, void *ptr); void *ta_memdup(void *ta_parent, const void *ptr, size_t size); char *ta_strdup(void *ta_parent, const char *str); bool ta_strdup_append(char **str, const char *a); bool ta_strdup_append_buffer(char **str, const char *a); char *ta_strndup(void *ta_parent, const char *str, size_t n); bool ta_strndup_append(char **str, const char *a, size_t n); bool ta_strndup_append_buffer(char **str, const char *a, size_t n); char *ta_asprintf(void *ta_parent, const char *fmt, ...) TA_PRF(2, 3); char *ta_vasprintf(void *ta_parent, const char *fmt, va_list ap) TA_PRF(2, 0); bool ta_asprintf_append(char **str, const char *fmt, ...) TA_PRF(2, 3); bool ta_vasprintf_append(char **str, const char *fmt, va_list ap) TA_PRF(2, 0); bool ta_asprintf_append_buffer(char **str, const char *fmt, ...) TA_PRF(2, 3); bool ta_vasprintf_append_buffer(char **str, const char *fmt, va_list ap) TA_PRF(2, 0); #define ta_new(ta_parent, type) (type *)ta_alloc_size(ta_parent, sizeof(type)) #define ta_znew(ta_parent, type) (type *)ta_zalloc_size(ta_parent, sizeof(type)) #define ta_new_array(ta_parent, type, count) \ (type *)ta_alloc_size(ta_parent, ta_calc_array_size(sizeof(type), count)) #define ta_znew_array(ta_parent, type, count) \ (type *)ta_zalloc_size(ta_parent, ta_calc_array_size(sizeof(type), count)) #define ta_new_array_size(ta_parent, element_size, count) \ ta_alloc_size(ta_parent, ta_calc_array_size(element_size, count)) #define ta_realloc(ta_parent, ptr, type, count) \ (type *)ta_realloc_size(ta_parent, ptr, ta_calc_array_size(sizeof(type), count)) #define ta_new_ptrtype(ta_parent, ptr) \ (TA_TYPEOF(ptr))ta_alloc_size(ta_parent, sizeof(*ptr)) #define ta_new_array_ptrtype(ta_parent, ptr, count) \ (TA_TYPEOF(ptr))ta_new_array_size(ta_parent, sizeof(*(ptr)), count) #define ta_steal(ta_parent, ptr) (TA_TYPEOF(ptr))ta_steal_(ta_parent, ptr) #define ta_dup_ptrtype(ta_parent, ptr) \ (TA_TYPEOF(ptr))ta_memdup(ta_parent, (void*) (ptr), sizeof(*(ptr))) // Ugly macros that crash on OOM. // All of these mirror real functions (with a 'x' added after the 'ta_' // prefix), and the only difference is that they will call abort() on allocation // failures (such as out of memory conditions), instead of returning an error // code. #define ta_xalloc_size(...) ta_oom_p(ta_alloc_size(__VA_ARGS__)) #define ta_xzalloc_size(...) ta_oom_p(ta_zalloc_size(__VA_ARGS__)) #define ta_xset_destructor(...) ta_oom_b(ta_set_destructor(__VA_ARGS__)) #define ta_xset_parent(...) ta_oom_b(ta_set_parent(__VA_ARGS__)) #define ta_xnew_context(...) ta_oom_p(ta_new_context(__VA_ARGS__)) #define ta_xstrdup_append(...) ta_oom_b(ta_strdup_append(__VA_ARGS__)) #define ta_xstrdup_append_buffer(...) ta_oom_b(ta_strdup_append_buffer(__VA_ARGS__)) #define ta_xstrndup_append(...) ta_oom_b(ta_strndup_append(__VA_ARGS__)) #define ta_xstrndup_append_buffer(...) ta_oom_b(ta_strndup_append_buffer(__VA_ARGS__)) #define ta_xasprintf(...) ta_oom_s(ta_asprintf(__VA_ARGS__)) #define ta_xvasprintf(...) ta_oom_s(ta_vasprintf(__VA_ARGS__)) #define ta_xasprintf_append(...) ta_oom_b(ta_asprintf_append(__VA_ARGS__)) #define ta_xvasprintf_append(...) ta_oom_b(ta_vasprintf_append(__VA_ARGS__)) #define ta_xasprintf_append_buffer(...) ta_oom_b(ta_asprintf_append_buffer(__VA_ARGS__)) #define ta_xvasprintf_append_buffer(...) ta_oom_b(ta_vasprintf_append_buffer(__VA_ARGS__)) #define ta_xnew(...) ta_oom_g(ta_new(__VA_ARGS__)) #define ta_xznew(...) ta_oom_g(ta_znew(__VA_ARGS__)) #define ta_xnew_array(...) ta_oom_g(ta_new_array(__VA_ARGS__)) #define ta_xznew_array(...) ta_oom_g(ta_znew_array(__VA_ARGS__)) #define ta_xnew_array_size(...) ta_oom_p(ta_new_array_size(__VA_ARGS__)) #define ta_xnew_ptrtype(...) ta_oom_g(ta_new_ptrtype(__VA_ARGS__)) #define ta_xnew_array_ptrtype(...) ta_oom_g(ta_new_array_ptrtype(__VA_ARGS__)) #define ta_xdup_ptrtype(...) ta_oom_g(ta_dup_ptrtype(__VA_ARGS__)) #define ta_xsteal(ta_parent, ptr) (TA_TYPEOF(ptr))ta_xsteal_(ta_parent, ptr) #define ta_xrealloc(ta_parent, ptr, type, count) \ (type *)ta_xrealloc_size(ta_parent, ptr, ta_calc_array_size(sizeof(type), count)) // Can't be macros, because the OOM logic is slightly less trivial. char *ta_xstrdup(void *ta_parent, const char *str); char *ta_xstrndup(void *ta_parent, const char *str, size_t n); void *ta_xsteal_(void *ta_parent, void *ptr); void *ta_xmemdup(void *ta_parent, const void *ptr, size_t size); void *ta_xrealloc_size(void *ta_parent, void *ptr, size_t size); #ifndef TA_NO_WRAPPERS #define ta_alloc_size(...) ta_dbg_set_loc(ta_alloc_size(__VA_ARGS__), TA_LOC) #define ta_zalloc_size(...) ta_dbg_set_loc(ta_zalloc_size(__VA_ARGS__), TA_LOC) #define ta_realloc_size(...) ta_dbg_set_loc(ta_realloc_size(__VA_ARGS__), TA_LOC) #define ta_memdup(...) ta_dbg_set_loc(ta_memdup(__VA_ARGS__), TA_LOC) #define ta_xmemdup(...) ta_dbg_set_loc(ta_xmemdup(__VA_ARGS__), TA_LOC) #define ta_xrealloc_size(...) ta_dbg_set_loc(ta_xrealloc_size(__VA_ARGS__), TA_LOC) #endif void ta_oom_b(bool b); char *ta_oom_s(char *s); void *ta_oom_p(void *p); // Generic pointer #define ta_oom_g(ptr) (TA_TYPEOF(ptr))ta_oom_p((void*) ptr) void ta_enable_leak_report(void); void ta_print_leak_report(void); // no-op when disabled void *ta_dbg_set_loc(void *ptr, const char *name); void *ta_dbg_mark_as_string(void *ptr); #endif libplacebo-0.4.0/src/ta/ta_utils.c000066400000000000000000000216631324021332500167750ustar00rootroot00000000000000/* Copyright (C) 2017 the mpv developers * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #define TA_NO_WRAPPERS #include "ta.h" // Return element_size * count. If it overflows, return (size_t)-1 (SIZE_MAX). // I.e. this returns the equivalent of: MIN(element_size * count, SIZE_MAX). // The idea is that every real memory allocator will reject (size_t)-1, thus // this is a valid way to handle too large array allocation requests. size_t ta_calc_array_size(size_t element_size, size_t count) { if (count > (((size_t)-1) / element_size)) return (size_t)-1; return element_size * count; } // This is used when an array has to be enlarged for appending new elements. // Return a "good" size for the new array (in number of elements). This returns // a value > nextidx, unless the calculation overflows, in which case SIZE_MAX // is returned. size_t ta_calc_prealloc_elems(size_t nextidx) { if (nextidx >= ((size_t)-1) / 2 - 1) return (size_t)-1; return (nextidx + 1) * 2; } static void dummy_dtor(void *p){} /* Create an empty (size 0) TA allocation, which is prepared in a way such that * using it as parent with ta_set_parent() always succeed. Calling * ta_set_destructor() on it will always succeed as well. */ void *ta_new_context(void *ta_parent) { void *new = ta_alloc_size(ta_parent, 0); // Force it to allocate an extended header. if (!ta_set_destructor(new, dummy_dtor)) { ta_free(new); new = NULL; } return new; } /* Set parent of ptr to ta_parent, return the ptr. * Note that ta_parent==NULL will simply unset the current parent of ptr. * If the operation fails (on OOM), return NULL. (That's pretty bad behavior, * but the only way to signal failure.) */ void *ta_steal_(void *ta_parent, void *ptr) { if (!ta_set_parent(ptr, ta_parent)) return NULL; return ptr; } /* Duplicate the memory at ptr with the given size. */ void *ta_memdup(void *ta_parent, const void *ptr, size_t size) { if (!ptr) { assert(!size); return NULL; } void *res = ta_alloc_size(ta_parent, size); if (!res) return NULL; memcpy(res, ptr, size); return res; } // *str = *str[0..at] + append[0..append_len] // (append_len being a maximum length; shorter if embedded \0s are encountered) static bool strndup_append_at(char **str, size_t at, const char *append, size_t append_len) { assert(ta_get_size(*str) >= at); if (!*str && !append) return true; // stays NULL, but not an OOM condition size_t real_len = append ? strnlen(append, append_len) : 0; if (append_len > real_len) append_len = real_len; if (ta_get_size(*str) < at + append_len + 1) { char *t = ta_realloc_size(NULL, *str, at + append_len + 1); if (!t) return false; *str = t; } if (append_len) memcpy(*str + at, append, append_len); (*str)[at + append_len] = '\0'; ta_dbg_mark_as_string(*str); return true; } /* Return a copy of str. * Returns NULL on OOM. */ char *ta_strdup(void *ta_parent, const char *str) { return ta_strndup(ta_parent, str, str ? strlen(str) : 0); } /* Return a copy of str. If the string is longer than n, copy only n characters * (the returned allocation will be n+1 bytes and contain a terminating '\0'). * The returned string will have the length MIN(strlen(str), n) * If str==NULL, return NULL. Returns NULL on OOM as well. */ char *ta_strndup(void *ta_parent, const char *str, size_t n) { if (!str) return NULL; char *new = NULL; strndup_append_at(&new, 0, str, n); if (!ta_set_parent(new, ta_parent)) { ta_free(new); new = NULL; } return new; } /* Append a to *str. If *str is NULL, the string is newly allocated, otherwise * ta_realloc() is used on *str as needed. * Return success or failure (it can fail due to OOM only). */ bool ta_strdup_append(char **str, const char *a) { return strndup_append_at(str, *str ? strlen(*str) : 0, a, (size_t)-1); } /* Like ta_strdup_append(), but use ta_get_size(*str)-1 instead of strlen(*str). * (See also: ta_asprintf_append_buffer()) */ bool ta_strdup_append_buffer(char **str, const char *a) { size_t size = ta_get_size(*str); if (size > 0) size -= 1; return strndup_append_at(str, size, a, (size_t)-1); } /* Like ta_strdup_append(), but limit the length of a with n. * (See also: ta_strndup()) */ bool ta_strndup_append(char **str, const char *a, size_t n) { return strndup_append_at(str, *str ? strlen(*str) : 0, a, n); } /* Like ta_strdup_append_buffer(), but limit the length of a with n. * (See also: ta_strndup()) */ bool ta_strndup_append_buffer(char **str, const char *a, size_t n) { size_t size = ta_get_size(*str); if (size > 0) size -= 1; return strndup_append_at(str, size, a, n); } static bool ta_vasprintf_append_at(char **str, size_t at, const char *fmt, va_list ap) { assert(ta_get_size(*str) >= at); int size; va_list copy; va_copy(copy, ap); char c; size = vsnprintf(&c, 1, fmt, copy); va_end(copy); if (size < 0) return false; if (ta_get_size(*str) < at + size + 1) { char *t = ta_realloc_size(NULL, *str, at + size + 1); if (!t) return false; *str = t; } vsnprintf(*str + at, size + 1, fmt, ap); ta_dbg_mark_as_string(*str); return true; } /* Like snprintf(); returns the formatted string as allocation (or NULL on OOM * or snprintf() errors). */ char *ta_asprintf(void *ta_parent, const char *fmt, ...) { char *res; va_list ap; va_start(ap, fmt); res = ta_vasprintf(ta_parent, fmt, ap); va_end(ap); return res; } char *ta_vasprintf(void *ta_parent, const char *fmt, va_list ap) { char *res = NULL; ta_vasprintf_append_at(&res, 0, fmt, ap); if (!res || !ta_set_parent(res, ta_parent)) { ta_free(res); return NULL; } return res; } /* Append the formatted string to *str (after strlen(*str)). The allocation is * ta_realloced if needed. * Returns false on OOM or snprintf() errors, with *str left untouched. */ bool ta_asprintf_append(char **str, const char *fmt, ...) { bool res; va_list ap; va_start(ap, fmt); res = ta_vasprintf_append(str, fmt, ap); va_end(ap); return res; } bool ta_vasprintf_append(char **str, const char *fmt, va_list ap) { return ta_vasprintf_append_at(str, *str ? strlen(*str) : 0, fmt, ap); } /* Append the formatted string at the end of the allocation of *str. It * overwrites the last byte of the allocation too (which is assumed to be the * '\0' terminating the string). Compared to ta_asprintf_append(), this is * useful if you know that the string ends with the allocation, so that the * extra strlen() can be avoided for better performance. * Returns false on OOM or snprintf() errors, with *str left untouched. */ bool ta_asprintf_append_buffer(char **str, const char *fmt, ...) { bool res; va_list ap; va_start(ap, fmt); res = ta_vasprintf_append_buffer(str, fmt, ap); va_end(ap); return res; } bool ta_vasprintf_append_buffer(char **str, const char *fmt, va_list ap) { size_t size = ta_get_size(*str); if (size > 0) size -= 1; return ta_vasprintf_append_at(str, size, fmt, ap); } void *ta_oom_p(void *p) { if (!p) abort(); return p; } void ta_oom_b(bool b) { if (!b) abort(); } char *ta_oom_s(char *s) { if (!s) abort(); return s; } void *ta_xsteal_(void *ta_parent, void *ptr) { ta_oom_b(ta_set_parent(ptr, ta_parent)); return ptr; } void *ta_xmemdup(void *ta_parent, const void *ptr, size_t size) { void *new = ta_memdup(ta_parent, ptr, size); ta_oom_b(new || !ptr); return new; } void *ta_xrealloc_size(void *ta_parent, void *ptr, size_t size) { ptr = ta_realloc_size(ta_parent, ptr, size); ta_oom_b(ptr || !size); return ptr; } char *ta_xstrdup(void *ta_parent, const char *str) { char *res = ta_strdup(ta_parent, str); ta_oom_b(res || !str); return res; } char *ta_xstrndup(void *ta_parent, const char *str, size_t n) { char *res = ta_strndup(ta_parent, str, n); ta_oom_b(res || !str); return res; } libplacebo-0.4.0/src/ta/talloc.c000066400000000000000000000065021324021332500164220ustar00rootroot00000000000000/* Copyright (C) 2017 the mpv developers * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include "talloc.h" char *ta_talloc_strdup_append(char *s, const char *a) { ta_xstrdup_append(&s, a); return s; } char *ta_talloc_strdup_append_buffer(char *s, const char *a) { ta_xstrdup_append_buffer(&s, a); return s; } char *ta_talloc_strndup_append(char *s, const char *a, size_t n) { ta_xstrndup_append(&s, a, n); return s; } char *ta_talloc_strndup_append_buffer(char *s, const char *a, size_t n) { ta_xstrndup_append_buffer(&s, a, n); return s; } char *ta_talloc_vasprintf_append(char *s, const char *fmt, va_list ap) { ta_xvasprintf_append(&s, fmt, ap); return s; } char *ta_talloc_vasprintf_append_buffer(char *s, const char *fmt, va_list ap) { ta_xvasprintf_append_buffer(&s, fmt, ap); return s; } char *ta_talloc_asprintf_append(char *s, const char *fmt, ...) { char *res; va_list ap; va_start(ap, fmt); res = talloc_vasprintf_append(s, fmt, ap); va_end(ap); return res; } char *ta_talloc_asprintf_append_buffer(char *s, const char *fmt, ...) { char *res; va_list ap; va_start(ap, fmt); res = talloc_vasprintf_append_buffer(s, fmt, ap); va_end(ap); return res; } struct ta_ref { pthread_mutex_t lock; int refcount; }; struct ta_ref *ta_ref_new(void *t) { struct ta_ref *ref = ta_znew(t, struct ta_ref); if (!ref) return NULL; *ref = (struct ta_ref) { .lock = PTHREAD_MUTEX_INITIALIZER, .refcount = 1, }; return ref; } struct ta_ref *ta_ref_dup(struct ta_ref *ref) { if (!ref) return NULL; pthread_mutex_lock(&ref->lock); ref->refcount++; pthread_mutex_unlock(&ref->lock); return ref; } void ta_ref_deref(struct ta_ref **refp) { struct ta_ref *ref = *refp; if (!ref) return; pthread_mutex_lock(&ref->lock); if (--ref->refcount > 0) { pthread_mutex_unlock(&ref->lock); return; } pthread_mutex_destroy(&ref->lock); ta_free(ref); *refp = NULL; } // Indirection object, used to associate the destructor with a ta_ref_deref struct ta_ref_indirect { struct ta_ref *ref; }; static void ta_ref_indir_dtor(void *p) { struct ta_ref_indirect *indir = p; ta_ref_deref(&indir->ref); } bool ta_ref_attach(void *t, struct ta_ref *ref) { if (!ref) return true; struct ta_ref_indirect *indir = ta_new_ptrtype(t, indir); if (!indir) return false; indir->ref = ta_ref_dup(ref); ta_set_destructor(indir, ta_ref_indir_dtor); return true; } libplacebo-0.4.0/src/ta/talloc.h000066400000000000000000000164141324021332500164320ustar00rootroot00000000000000/* Copyright (C) 2017 the mpv developers * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef TA_TALLOC_H_ #define TA_TALLOC_H_ #include #include "ta.h" // Note: all talloc wrappers are wired to the "x" functions, which abort on OOM. // libtalloc doesn't do that, but the mplayer2/mpv internal copies of it did. #define talloc ta_xnew #define talloc_zero ta_xznew #define talloc_array ta_xnew_array #define talloc_zero_array ta_xznew_array #define talloc_array_size ta_xnew_array_size #define talloc_realloc ta_xrealloc #define talloc_ptrtype ta_xnew_ptrtype #define talloc_array_ptrtype ta_xnew_array_ptrtype #define talloc_steal ta_xsteal #define talloc_realloc_size ta_xrealloc_size #define talloc_new ta_xnew_context #define talloc_set_destructor ta_xset_destructor #define talloc_parent ta_find_parent #define talloc_enable_leak_report ta_enable_leak_report #define talloc_print_leak_report ta_print_leak_report #define talloc_size ta_xalloc_size #define talloc_zero_size ta_xzalloc_size #define talloc_get_size ta_get_size #define talloc_free_children ta_free_children #define talloc_free ta_free #define talloc_memdup ta_xmemdup #define talloc_strdup ta_xstrdup #define talloc_strndup ta_xstrndup #define talloc_ptrdup ta_xdup_ptrtype #define talloc_asprintf ta_xasprintf #define talloc_vasprintf ta_xvasprintf // Don't define linker-level symbols, as that would clash with real libtalloc. #define talloc_strdup_append ta_talloc_strdup_append #define talloc_strdup_append_buffer ta_talloc_strdup_append_buffer #define talloc_strndup_append ta_talloc_strndup_append #define talloc_strndup_append_buffer ta_talloc_strndup_append_buffer #define talloc_vasprintf_append ta_talloc_vasprintf_append #define talloc_vasprintf_append_buffer ta_talloc_vasprintf_append_buffer #define talloc_asprintf_append ta_talloc_asprintf_append #define talloc_asprintf_append_buffer ta_talloc_asprintf_append_buffer char *ta_talloc_strdup(void *t, const char *p); char *ta_talloc_strdup_append(char *s, const char *a); char *ta_talloc_strdup_append_buffer(char *s, const char *a); char *ta_talloc_strndup(void *t, const char *p, size_t n); char *ta_talloc_strndup_append(char *s, const char *a, size_t n); char *ta_talloc_strndup_append_buffer(char *s, const char *a, size_t n); char *ta_talloc_vasprintf_append(char *s, const char *fmt, va_list ap) TA_PRF(2, 0); char *ta_talloc_vasprintf_append_buffer(char *s, const char *fmt, va_list ap) TA_PRF(2, 0); char *ta_talloc_asprintf_append(char *s, const char *fmt, ...) TA_PRF(2, 3); char *ta_talloc_asprintf_append_buffer(char *s, const char *fmt, ...) TA_PRF(2, 3); // Talloc refcounting struct ta_ref; // ta_ref_deref will free the ref and all of its children as soon as the // internal refcount reaches 0 struct ta_ref *ta_ref_new(void *t); struct ta_ref *ta_ref_dup(struct ta_ref *ref); void ta_ref_deref(struct ta_ref **ref); // Attaches a reference as a child of another talloc ctx, such that freeing // `t` is like dereferencing the ta_ref. bool ta_ref_attach(void *t, struct ta_ref *ref); #define talloc_ref_new(...) ta_oom_p(ta_ref_new(__VA_ARGS__)) #define talloc_ref_dup(...) ta_oom_p(ta_ref_dup(__VA_ARGS__)) #define talloc_ref_deref(...) ta_ref_deref(__VA_ARGS__) #define talloc_ref_attach(...) ta_oom_b(ta_ref_attach(__VA_ARGS__)) // Utility functions (ported from mpv) #define TA_FREEP(pctx) do {talloc_free(*(pctx)); *(pctx) = NULL;} while(0) #define TA_EXPAND_ARGS(...) __VA_ARGS__ #define TALLOC_AVAIL(p) (talloc_get_size(p) / sizeof((p)[0])) #define TARRAY_RESIZE(ctx, p, count) \ do { \ (p) = ta_xrealloc_size(ctx, p, \ ta_calc_array_size(sizeof((p)[0]), count)); \ } while (0) #define TARRAY_GROW(ctx, p, nextidx) \ do { \ size_t nextidx_ = (nextidx); \ if (nextidx_ >= TALLOC_AVAIL(p)) \ TARRAY_RESIZE(ctx, p, ta_calc_prealloc_elems(nextidx_)); \ } while (0) #define TARRAY_APPEND(ctx, p, idxvar, ...) \ do { \ TARRAY_GROW(ctx, p, idxvar); \ (p)[(idxvar)] = (TA_EXPAND_ARGS(__VA_ARGS__));\ (idxvar)++; \ } while (0) #define TARRAY_INSERT_AT(ctx, p, idxvar, at, ...) \ do { \ size_t at_ = (at); \ assert(at_ <= (idxvar)); \ TARRAY_GROW(ctx, p, idxvar); \ memmove((p) + at_ + 1, (p) + at_, \ ((idxvar) - at_) * sizeof((p)[0])); \ (idxvar)++; \ (p)[at_] = (TA_EXPAND_ARGS(__VA_ARGS__)); \ } while (0) // Appends all of `op` to `p` #define TARRAY_CONCAT(ctx, p, idxvar, op, oidxvar) \ do { \ TARRAY_GROW(ctx, p, (idxvar) + (oidxvar)); \ memmove((p) + (idxvar), (op), \ (oidxvar) * sizeof((op)[0])); \ (idxvar) += (oidxvar); \ } while (0) // Doesn't actually free any memory, or do any other talloc calls. #define TARRAY_REMOVE_AT(p, idxvar, at) \ do { \ size_t at_ = (at); \ assert(at_ <= (idxvar)); \ memmove((p) + at_, (p) + at_ + 1, \ ((idxvar) - at_ - 1) * sizeof((p)[0])); \ (idxvar)--; \ } while (0) // Returns whether or not there was any element to pop. #define TARRAY_POP(p, idxvar, out) \ ((idxvar) > 0 \ ? (*(out) = (p)[--(idxvar)], true) \ : false \ ) #define TARRAY_DUP(ctx, p, count) \ talloc_memdup(ctx, p, (count) * sizeof((p)[0])) #define talloc_struct(ctx, type, ...) \ talloc_memdup(ctx, &(type) TA_EXPAND_ARGS(__VA_ARGS__), sizeof(type)) #endif libplacebo-0.4.0/src/tests/000077500000000000000000000000001324021332500155335ustar00rootroot00000000000000libplacebo-0.4.0/src/tests/bench.c000066400000000000000000000265441324021332500167710ustar00rootroot00000000000000#include "tests.h" #include "time.h" #define TEX_SIZE 2048 #define CUBE_SIZE 64 #define NUM_FBOS 10 #define BENCH_DUR 3 static const struct pl_tex *create_test_img(const struct pl_gpu *gpu) { const struct pl_fmt *fmt; fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, PL_FMT_CAP_LINEAR); REQUIRE(fmt); int cube_stride = TEX_SIZE / CUBE_SIZE; int cube_count = cube_stride * cube_stride; assert(cube_count * CUBE_SIZE * CUBE_SIZE == TEX_SIZE * TEX_SIZE); float *data = malloc(TEX_SIZE * TEX_SIZE * sizeof(float[4])); for (int n = 0; n < cube_count; n++) { int xbase = (n % cube_stride) * CUBE_SIZE; int ybase = (n / cube_stride) * CUBE_SIZE; for (int g = 0; g < CUBE_SIZE; g++) { for (int r = 0; r < CUBE_SIZE; r++) { int xpos = xbase + r; int ypos = ybase + g; assert(xpos < TEX_SIZE && ypos < TEX_SIZE); float *color = &data[(ypos * TEX_SIZE + xpos) * 4]; color[0] = (float) r / CUBE_SIZE; color[1] = (float) g / CUBE_SIZE; color[2] = (float) n / cube_count; color[3] = 1.0; } } } const struct pl_tex *tex = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fmt, .w = TEX_SIZE, .h = TEX_SIZE, .sampleable = true, .sample_mode = PL_TEX_SAMPLE_LINEAR, .initial_data = data, }); free(data); REQUIRE(tex); return tex; } struct fbo { const struct pl_buf *buf; const struct pl_tex *tex; }; static void create_fbos(const struct pl_gpu *gpu, struct fbo fbos[NUM_FBOS]) { const struct pl_fmt *fmt; fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 0, PL_FMT_CAP_RENDERABLE); REQUIRE(fmt); for (int i = 0; i < NUM_FBOS; i++) { fbos[i].tex = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fmt, .w = TEX_SIZE, .h = TEX_SIZE, .renderable = true, .host_readable = true, .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE), }); REQUIRE(fbos[i].tex); fbos[i].buf = pl_buf_create(gpu, &(struct pl_buf_params) { .type = PL_BUF_TEX_TRANSFER, .size = fmt->texel_size, .host_readable = true, }); REQUIRE(fbos[i].buf); } } typedef void (*bench_fn)(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src); static void run_bench(const struct pl_gpu *gpu, struct pl_dispatch *dp, struct pl_shader_obj **state, const struct pl_tex *src, struct fbo fbo, bench_fn bench) { // Hard block until the FBO is free while (pl_buf_poll(gpu, fbo.buf, 1000000)); // 1 ms pl_dispatch_reset_frame(dp); struct pl_shader *sh = pl_dispatch_begin(dp); bench(sh, state, src); pl_dispatch_finish(dp, &sh, fbo.tex, NULL, NULL); bool ok = pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo.tex, .buf = fbo.buf, // Transfer a single pixel: .stride_w = 1, .rc = { 0, 0, 0, 1, 1, 1 }, }); REQUIRE(ok); } static void benchmark(const struct pl_gpu *gpu, const char *name, bench_fn bench) { struct pl_dispatch *dp = pl_dispatch_create(gpu->ctx, gpu); struct pl_shader_obj *state = NULL; const struct pl_tex *src = create_test_img(gpu); struct fbo fbos[NUM_FBOS] = {0}; create_fbos(gpu, fbos); // Run the benchmark and flush+block once to force shader compilation etc. run_bench(gpu, dp, &state, src, fbos[0], bench); pl_gpu_flush(gpu); while (pl_buf_poll(gpu, fbos[0].buf, 1000000000)); // 1 s // Perform the actual benchmark clock_t start = clock(), stop = {0}; unsigned long frames = 0; int index = 0; do { frames++; run_bench(gpu, dp, &state, src, fbos[index++], bench); index %= NUM_FBOS; stop = clock(); } while (stop - start < BENCH_DUR * CLOCKS_PER_SEC); float secs = (float) (stop - start) / CLOCKS_PER_SEC; printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)\n", name, frames, secs, 1000 * secs / frames, frames / secs); pl_shader_obj_destroy(&state); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); for (int i = 0; i < NUM_FBOS; i++) { pl_tex_destroy(gpu, &fbos[i].tex); pl_buf_destroy(gpu, &fbos[i].buf); } } // List of benchmarks static void bench_bt2020c(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_color_repr repr = { .sys = PL_COLOR_SYSTEM_BT_2020_C, .levels = PL_COLOR_LEVELS_TV, }; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_decode_color(sh, &repr, NULL); } static void bench_deband(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { pl_shader_deband(sh, src, NULL); } static void bench_deband_heavy(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { pl_shader_deband(sh, src, &(struct pl_deband_params) { .iterations = 4, .threshold = 4.0, .radius = 4.0, .grain = 16.0, }); } static void bench_bilinear(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); } static void bench_bicubic(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { pl_shader_sample_bicubic(sh, &(struct pl_sample_src) { .tex = src }); } static void bench_dither_blue(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_dither_params params = pl_dither_default_params; params.method = PL_DITHER_BLUE_NOISE; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_dither(sh, 8, state, ¶ms); } static void bench_dither_white(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_dither_params params = pl_dither_default_params; params.method = PL_DITHER_WHITE_NOISE; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_dither(sh, 8, state, ¶ms); } static void bench_dither_ordered_lut(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_dither_params params = pl_dither_default_params; params.method = PL_DITHER_ORDERED_LUT; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_dither(sh, 8, state, ¶ms); } static void bench_dither_ordered_fix(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_dither_params params = pl_dither_default_params; params.method = PL_DITHER_ORDERED_FIXED; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_dither(sh, 8, state, ¶ms); } static void bench_polar(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .lut = state, }; pl_shader_sample_polar(sh, &(struct pl_sample_src) { .tex = src }, ¶ms); } static void bench_polar_nocompute(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .no_compute = true, .lut = state, }; pl_shader_sample_polar(sh, &(struct pl_sample_src) { .tex = src }, ¶ms); } static void bench_hdr_hable(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_color_map_params params = { .tone_mapping_algo = PL_TONE_MAPPING_HABLE, }; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } static void bench_hdr_mobius(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_color_map_params params = { .tone_mapping_algo = PL_TONE_MAPPING_MOBIUS, }; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } static void bench_hdr_peak(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_color_map_params params = { .tone_mapping_algo = PL_TONE_MAPPING_CLIP, .peak_detect_frames = 10, }; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } static void bench_hdr_desat(struct pl_shader *sh, struct pl_shader_obj **state, const struct pl_tex *src) { struct pl_color_map_params params = { .tone_mapping_algo = PL_TONE_MAPPING_CLIP, .tone_mapping_desaturate = 1.0, }; pl_shader_sample_direct(sh, &(struct pl_sample_src) { .tex = src }); pl_shader_color_map(sh, ¶ms, pl_color_space_hdr10, pl_color_space_monitor, state, false); } int main() { setbuf(stdout, NULL); setbuf(stderr, NULL); struct pl_context *ctx; ctx = pl_context_create(PL_API_VER, &(struct pl_context_params) { .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple, .log_level = PL_LOG_WARN, }); const struct pl_vulkan *vk = pl_vulkan_create(ctx, NULL); if (!vk) return SKIP; printf("= Running benchmarks =\n"); benchmark(vk->gpu, "bilinear", bench_bilinear); benchmark(vk->gpu, "bicubic", bench_bicubic); benchmark(vk->gpu, "deband", bench_deband); benchmark(vk->gpu, "deband_heavy", bench_deband_heavy); // Dithering algorithms benchmark(vk->gpu, "dither_blue", bench_dither_blue); benchmark(vk->gpu, "dither_white", bench_dither_white); benchmark(vk->gpu, "dither_ordered_lut", bench_dither_ordered_lut); benchmark(vk->gpu, "dither_ordered_fixed", bench_dither_ordered_fix); // Polar sampling benchmark(vk->gpu, "polar", bench_polar); if (vk->gpu->caps & PL_GPU_CAP_COMPUTE) benchmark(vk->gpu, "polar_nocompute", bench_polar_nocompute); // HDR tone mapping benchmark(vk->gpu, "hdr_hable", bench_hdr_hable); benchmark(vk->gpu, "hdr_mobius", bench_hdr_mobius); benchmark(vk->gpu, "hdr_desaturate", bench_hdr_desat); if (vk->gpu->caps & PL_GPU_CAP_COMPUTE) benchmark(vk->gpu, "hdr_peakdetect", bench_hdr_peak); // Misc stuff benchmark(vk->gpu, "bt2020c", bench_bt2020c); return 0; } libplacebo-0.4.0/src/tests/colorspace.c000066400000000000000000000145541324021332500200420ustar00rootroot00000000000000#include "tests.h" int main() { for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { bool ycbcr = sys >= PL_COLOR_SYSTEM_BT_601 && sys <= PL_COLOR_SYSTEM_YCGCO; REQUIRE(ycbcr == pl_color_system_is_ycbcr_like(sys)); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { bool hdr = trc >= PL_COLOR_TRC_PQ && trc <= PL_COLOR_TRC_S_LOG2; REQUIRE(hdr == pl_color_transfer_is_hdr(trc)); REQUIRE(pl_color_transfer_nominal_peak(trc) >= 1.0); } float pq_peak = pl_color_transfer_nominal_peak(PL_COLOR_TRC_PQ); REQUIRE(feq(PL_COLOR_REF_WHITE * pq_peak, 10000)); struct pl_color_repr tv_repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_TV, }; struct pl_color_repr pc_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, }; // Ensure this is a no-op for bits == bits for (int bits = 1; bits <= 16; bits++) { tv_repr.bits.color_depth = tv_repr.bits.sample_depth = bits; pc_repr.bits.color_depth = pc_repr.bits.sample_depth = bits; REQUIRE(feq(pl_color_repr_normalize(&tv_repr), 1.0)); REQUIRE(feq(pl_color_repr_normalize(&pc_repr), 1.0)); } tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 10; float tv8to10 = pl_color_repr_normalize(&tv_repr); tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 12; float tv8to12 = pl_color_repr_normalize(&tv_repr); // Simulate the effect of GPU texture sampling on UNORM texture REQUIRE(feq(tv8to10 * 16 /1023., 64/1023.)); // black REQUIRE(feq(tv8to10 * 235/1023., 940/1023.)); // nominal white REQUIRE(feq(tv8to10 * 128/1023., 512/1023.)); // achromatic REQUIRE(feq(tv8to10 * 240/1023., 960/1023.)); // nominal chroma peak REQUIRE(feq(tv8to12 * 16 /4095., 256 /4095.)); // black REQUIRE(feq(tv8to12 * 235/4095., 3760/4095.)); // nominal white REQUIRE(feq(tv8to12 * 128/4095., 2048/4095.)); // achromatic REQUIRE(feq(tv8to12 * 240/4095., 3840/4095.)); // nominal chroma peak // Ensure lavc's xyz12 is handled correctly struct pl_color_repr xyz12 = { .sys = PL_COLOR_SYSTEM_XYZ, .levels = PL_COLOR_LEVELS_UNKNOWN, .bits = { .sample_depth = 16, .color_depth = 12, .bit_shift = 4, }, }; float xyz = pl_color_repr_normalize(&xyz12); REQUIRE(feq(xyz * (4095 << 4), 65535)); // Assume we uploaded a 10-bit source directly (unshifted) as a 16-bit // texture. This texture multiplication factor should make it behave as if // it was uploaded as a 10-bit texture instead. pc_repr.bits.color_depth = 10; pc_repr.bits.sample_depth = 16; float pc10to16 = pl_color_repr_normalize(&pc_repr); REQUIRE(feq(pc10to16 * 1000/65535., 1000/1023.)); const struct pl_raw_primaries *bt709, *bt2020; bt709 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); bt2020 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020); struct pl_matrix3x3 rgb2xyz, rgb2xyz_; rgb2xyz = rgb2xyz_ = pl_get_rgb2xyz_matrix(bt709); pl_matrix3x3_invert(&rgb2xyz_); pl_matrix3x3_invert(&rgb2xyz_); // Make sure the double-inversion round trips for (int y = 0; y < 3; y++) { for (int x = 0; x < 3; x++) REQUIRE(feq(rgb2xyz.m[y][x], rgb2xyz_.m[y][x])); } // Make sure mapping the spectral RGB colors (i.e. the matrix rows) matches // our original primaries float Y = rgb2xyz.m[1][0]; REQUIRE(feq(rgb2xyz.m[0][0], pl_cie_X(bt709->red) * Y)); REQUIRE(feq(rgb2xyz.m[2][0], pl_cie_Z(bt709->red) * Y)); Y = rgb2xyz.m[1][1]; REQUIRE(feq(rgb2xyz.m[0][1], pl_cie_X(bt709->green) * Y)); REQUIRE(feq(rgb2xyz.m[2][1], pl_cie_Z(bt709->green) * Y)); Y = rgb2xyz.m[1][2]; REQUIRE(feq(rgb2xyz.m[0][2], pl_cie_X(bt709->blue) * Y)); REQUIRE(feq(rgb2xyz.m[2][2], pl_cie_Z(bt709->blue) * Y)); // Make sure the gamut mapping round-trips struct pl_matrix3x3 bt709_bt2020, bt2020_bt709; bt709_bt2020 = pl_get_color_mapping_matrix(bt709, bt2020, PL_INTENT_RELATIVE_COLORIMETRIC); bt2020_bt709 = pl_get_color_mapping_matrix(bt2020, bt709, PL_INTENT_RELATIVE_COLORIMETRIC); for (int n = 0; n < 10; n++) { float vec[3] = { RANDOM, RANDOM, RANDOM }; float dst[3] = { vec[0], vec[1], vec[2] }; pl_matrix3x3_apply(&bt709_bt2020, dst); pl_matrix3x3_apply(&bt2020_bt709, dst); for (int i = 0; i < 3; i++) REQUIRE(feq(dst[i], vec[i])); } // Ensure the decoding matrix round-trips to white/black for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { if (!pl_color_system_is_linear(sys)) continue; printf("testing color system %u\n", (unsigned) sys); struct pl_color_repr repr = { .levels = PL_COLOR_LEVELS_TV, .sys = sys, }; struct pl_transform3x3 yuv2rgb = pl_color_repr_decode(&repr, NULL); static const float white_ycbcr[3] = { 235/255., 128/255., 128/255. }; static const float black_ycbcr[3] = { 16/255., 128/255., 128/255. }; static const float white_other[3] = { 235/255., 235/255., 235/255. }; static const float black_other[3] = { 16/255., 16/255., 16/255. }; float white[3], black[3]; for (int i = 0; i < 3; i++) { if (pl_color_system_is_ycbcr_like(sys)) { white[i] = white_ycbcr[i]; black[i] = black_ycbcr[i]; } else { white[i] = white_other[i]; black[i] = black_other[i]; } } pl_transform3x3_apply(&yuv2rgb, white); REQUIRE(feq(white[0], 1.0)); REQUIRE(feq(white[1], 1.0)); REQUIRE(feq(white[2], 1.0)); pl_transform3x3_apply(&yuv2rgb, black); REQUIRE(feq(black[0], 0.0)); REQUIRE(feq(black[1], 0.0)); REQUIRE(feq(black[2], 0.0)); } // Simulate a typical 10-bit YCbCr -> 16 bit texture conversion tv_repr.bits.color_depth = 10; tv_repr.bits.sample_depth = 16; struct pl_transform3x3 yuv2rgb; yuv2rgb = pl_color_repr_decode(&tv_repr, NULL); float test[3] = { 575/65535., 336/65535., 640/65535. }; pl_transform3x3_apply(&yuv2rgb, test); REQUIRE(feq(test[0], 0.808305)); REQUIRE(feq(test[1], 0.553254)); REQUIRE(feq(test[2], 0.218841)); } libplacebo-0.4.0/src/tests/context.c000066400000000000000000000001611324021332500173610ustar00rootroot00000000000000#include "tests.h" int main() { struct pl_context *ctx = pl_test_context(); pl_context_destroy(&ctx); } libplacebo-0.4.0/src/tests/dither.c000066400000000000000000000011451324021332500171570ustar00rootroot00000000000000#include "tests.h" #define SHIFT 4 #define SIZE (1 << SHIFT) float data[SIZE][SIZE]; int main() { printf("Ordered dither matrix:\n"); pl_generate_bayer_matrix(&data[0][0], SIZE); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } printf("Blue noise dither matrix:\n"); pl_generate_blue_noise(&data[0][0], SHIFT); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } } libplacebo-0.4.0/src/tests/filters.c000066400000000000000000000027641324021332500173600ustar00rootroot00000000000000#include "tests.h" int main() { struct pl_context *ctx = pl_test_context(); for (const struct pl_named_filter_config *conf = pl_named_filters; conf->filter; conf++) { struct pl_filter_params params = { .config = *conf->filter, .lut_entries = 128, }; printf("== filter '%s' ==\n", conf->name); const struct pl_filter *flt = pl_filter_generate(ctx, ¶ms); REQUIRE(flt); if (params.config.polar) { printf("lut:"); for (int i = 0; i < params.lut_entries; i++) printf(" %f", flt->weights[i]); printf("\n"); // Ensure the kernel seems sanely scaled REQUIRE(feq(flt->weights[0], 1.0)); REQUIRE(feq(flt->weights[params.lut_entries - 1], 0.0)); } else { // Ensure the weights for each row add up to unity for (int i = 0; i < params.lut_entries; i++) { printf("row %d:", i); float sum = 0.0; REQUIRE(flt->row_size); REQUIRE(flt->row_stride >= flt->row_size); for (int n = 0; n < flt->row_size; n++) { float w = flt->weights[i * flt->row_stride + n]; printf(" %f", w); sum += w; } printf(" = %f\n", sum); REQUIRE(feq(sum, 1.0)); } } pl_filter_free(&flt); } pl_context_destroy(&ctx); } libplacebo-0.4.0/src/tests/gpu_tests.h000066400000000000000000000315421324021332500177260ustar00rootroot00000000000000#include "tests.h" #include "shaders.h" static void pl_test_roundtrip(const struct pl_gpu *gpu, const struct pl_tex *tex, float *src, float *dst) { REQUIRE(tex); int texels = tex->params.w; texels *= tex->params.h ? tex->params.h : 1; texels *= tex->params.d ? tex->params.d : 1; for (int i = 0; i < texels; i++) src[i] = RANDOM; REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){ .tex = tex, .ptr = src, })); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){ .tex = tex, .ptr = dst, })); for (int i = 0; i < texels; i++) REQUIRE(src[i] == dst[i]); } static void pl_texture_tests(const struct pl_gpu *gpu) { const struct pl_fmt *fmt; fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, 0); if (!fmt) return; struct pl_tex_params params = { .format = fmt, .host_writable = true, .host_readable = true, }; static float src[16*16*16] = {0}; static float dst[16*16*16] = {0}; const struct pl_tex *tex = NULL; if (gpu->limits.max_tex_1d_dim >= 16) { params.w = 16; tex = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, src, dst); pl_tex_destroy(gpu, &tex); } if (gpu->limits.max_tex_2d_dim >= 16) { params.w = params.h = 16; tex = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, src, dst); pl_tex_destroy(gpu, &tex); } if (gpu->limits.max_tex_3d_dim >= 16) { params.w = params.h = params.d = 16; tex = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, src, dst); pl_tex_destroy(gpu, &tex); } } static void pl_shader_tests(const struct pl_gpu *gpu) { if (gpu->glsl.version < 410) return; const char *vert_shader = "#version 410 \n" "layout(location=0) in vec2 vertex_pos; \n" "layout(location=1) in vec3 vertex_color; \n" "layout(location=0) out vec3 frag_color; \n" "void main() { \n" " gl_Position = vec4(vertex_pos, 0, 1); \n" " frag_color = vertex_color; \n" "}"; const char *frag_shader = "#version 410 \n" "layout(location=0) in vec3 frag_color; \n" "layout(location=0) out vec4 out_color; \n" "void main() { \n" " out_color = vec4(frag_color, 1.0); \n" "}"; const struct pl_fmt *fbo_fmt; enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_LINEAR; fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps); if (!fbo_fmt) return; #define FBO_W 16 #define FBO_H 16 const struct pl_tex *fbo; fbo = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .renderable = true, .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), .host_readable = true, .blit_dst = true, }); REQUIRE(fbo); pl_tex_clear(gpu, fbo, (float[4]){0}); const struct pl_fmt *vert_fmt; vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3); REQUIRE(vert_fmt); struct vertex { float pos[2]; float color[3]; } vertices[] = { {{-1.0, -1.0}, {0, 0, 0}}, {{ 1.0, -1.0}, {1, 0, 0}}, {{-1.0, 1.0}, {0, 1, 0}}, {{ 1.0, 1.0}, {1, 1, 0}}, }; const struct pl_pass *pass; pass = pl_pass_create(gpu, &(struct pl_pass_params) { .type = PL_PASS_RASTER, .target_dummy = *fbo, .vertex_shader = vert_shader, .glsl_shader = frag_shader, .vertex_type = PL_PRIM_TRIANGLE_STRIP, .vertex_stride = sizeof(struct vertex), .num_vertex_attribs = 2, .vertex_attribs = (struct pl_vertex_attrib[]) {{ .name = "vertex_pos", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), .location = 0, .offset = offsetof(struct vertex, pos), }, { .name = "vertex_color", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), .location = 1, .offset = offsetof(struct vertex, color), }}, }); REQUIRE(pass); REQUIRE(pass->params.cached_program_len); pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_data = vertices, .vertex_count = sizeof(vertices) / sizeof(struct vertex), }); static float data[FBO_H * FBO_W * 4] = {0}; pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = data, }); for (int y = 0; y < FBO_H; y++) { for (int x = 0; x < FBO_W; x++) { float *color = &data[(y * FBO_W + x) * 4]; printf("color: %f %f %f %f\n", color[0], color[1], color[2], color[3]); REQUIRE(feq(color[0], (x + 0.5) / FBO_W)); REQUIRE(feq(color[1], (y + 0.5) / FBO_H)); REQUIRE(feq(color[2], 0.0)); REQUIRE(feq(color[3], 1.0)); } } pl_pass_destroy(gpu, &pass); pl_tex_clear(gpu, fbo, (float[4]){0}); // Test the use of pl_dispatch struct pl_dispatch *dp = pl_dispatch_create(gpu->ctx, gpu); const struct pl_tex *src; src = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .sampleable = true, .sample_mode = PL_TEX_SAMPLE_LINEAR, .initial_data = data, }); // Repeat this a few times to test the caching for (int i = 0; i < 10; i++) { printf("iteration %d\n", i); pl_dispatch_reset_frame(dp); struct pl_shader *sh = pl_dispatch_begin(dp); // For testing, force the use of CS if possible if (gpu->caps & PL_GPU_CAP_COMPUTE) { sh->is_compute = true; sh->res.compute_group_size[0] = 8; sh->res.compute_group_size[1] = 8; } pl_shader_deband(sh, src, &(struct pl_deband_params) { .iterations = 0, .grain = 0.0, }); pl_shader_linearize(sh, PL_COLOR_TRC_GAMMA22); REQUIRE(pl_dispatch_finish(dp, &sh, fbo, NULL, NULL)); } pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = data, }); for (int y = 0; y < FBO_H; y++) { for (int x = 0; x < FBO_W; x++) { float *color = &data[(y * FBO_W + x) * 4]; printf("color: %f %f %f %f\n", color[0], color[1], color[2], color[3]); REQUIRE(feq(color[0], pow((x + 0.5) / FBO_W, 2.2))); REQUIRE(feq(color[1], pow((y + 0.5) / FBO_H, 2.2))); REQUIRE(feq(color[2], 0.0)); REQUIRE(feq(color[3], 1.0)); } } pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); pl_tex_destroy(gpu, &fbo); } static void pl_scaler_tests(const struct pl_gpu *gpu) { const struct pl_fmt *src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR); const struct pl_fmt *fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE); if (!src_fmt || !fbo_fmt) return; float *fbo_data = NULL; struct pl_shader_obj *lut = NULL; static float data_5x5[5][5] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, }; const struct pl_tex *dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 5, .h = 5, .format = src_fmt, .sampleable = true, .sample_mode = PL_TEX_SAMPLE_LINEAR, .address_mode = PL_TEX_ADDRESS_CLAMP, .initial_data = &data_5x5[0][0], }); const struct pl_tex *fbo = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 100, .h = 100, .format = fbo_fmt, .renderable = true, .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), .host_readable = true, }); struct pl_dispatch *dp = pl_dispatch_create(gpu->ctx, gpu); if (!dot5x5 || !fbo || !dp) goto error; struct pl_shader *sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_sample_polar(sh, &(struct pl_sample_src) { .tex = dot5x5, .new_w = fbo->params.w, .new_h = fbo->params.h, }, &(struct pl_sample_filter_params) { .filter = pl_filter_ewa_lanczos, .lut = &lut, .no_compute = !fbo->params.storable, } )); REQUIRE(pl_dispatch_finish(dp, &sh, fbo, NULL, NULL)); fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float)); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = fbo_data, })); int max = 255; printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max); for (int y = 0; y < fbo->params.h; y++) { for (int x = 0; x < fbo->params.w; x++) { float v = fbo_data[y * fbo->params.h + x]; printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max)); } printf("\n"); } error: free(fbo_data); pl_shader_obj_destroy(&lut); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &dot5x5); pl_tex_destroy(gpu, &fbo); } static void pl_render_tests(const struct pl_gpu *gpu) { const struct pl_fmt *fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE); if (!fbo_fmt) return; float *fbo_data = NULL; static float data_5x5[5][5] = { { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 0.0, 0.0 }, { 1.0, 0.0, 0.5, 0.0, 0.0 }, { 0.0, 0.0, 0.0, 1.0, 0.0 }, { 0.0, 0.3, 0.0, 0.0, 0.0 }, }; const int width = 5, height = 5; struct pl_plane img5x5 = {0}; bool ok = pl_upload_plane(gpu, &img5x5, &(struct pl_plane_data) { .type = PL_FMT_FLOAT, .width = width, .height = height, .component_size = { 8 * sizeof(float) }, .component_map = { 0 }, .pixel_stride = sizeof(float), .pixels = &data_5x5, }); if (!ok) { pl_tex_destroy(gpu, &img5x5.texture); return; } const struct pl_tex *fbo = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 40, .h = 40, .format = fbo_fmt, .renderable = true, .blit_dst = true, .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), .host_readable = true, }); struct pl_renderer *rr = pl_renderer_create(gpu->ctx, gpu); if (!fbo || !rr) goto error; struct pl_image image = { .signature = 0, .num_planes = 1, .planes = { img5x5 }, .repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_PC, }, .color = pl_color_space_hdr10, // test tone-mapping .width = width, .height = height, .src_rect = {-1.0, 0.0, width - 1.0, height}, }; struct pl_render_target target = { .fbo = fbo, .dst_rect = {2, 2, fbo->params.w - 2, fbo->params.h - 2}, .repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, }, .color = pl_color_space_srgb, }; pl_tex_clear(gpu, fbo, (float[4]){0}); REQUIRE(pl_render_image(rr, &image, &target, NULL)); fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float[4])); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = fbo_data, })); int max = 255; printf("P3\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max); for (int y = 0; y < fbo->params.h; y++) { for (int x = 0; x < fbo->params.w; x++) { float *v = &fbo_data[(y * fbo->params.h + x) * 4]; for (int i = 0; i < 3; i++) printf("%d ", (int) round(fmin(fmax(v[i], 0.0), 1.0) * max)); } printf("\n"); } error: free(fbo_data); pl_renderer_destroy(&rr); pl_tex_destroy(gpu, &img5x5.texture); pl_tex_destroy(gpu, &fbo); } static void gpu_tests(const struct pl_gpu *gpu) { pl_texture_tests(gpu); pl_shader_tests(gpu); pl_scaler_tests(gpu); pl_render_tests(gpu); } libplacebo-0.4.0/src/tests/tests.h000066400000000000000000000026541324021332500170550ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include #include #include #include static inline struct pl_context *pl_test_context() { setbuf(stdout, NULL); setbuf(stderr, NULL); return pl_context_create(PL_API_VER, &(struct pl_context_params) { .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple, .log_level = PL_LOG_ALL, }); } static inline void require(bool b, const char *msg) { if (!b) { fprintf(stderr, "%s", msg); exit(1); } } static inline bool feq(float a, float b) { return fabs(a - b) < 1e-6 * fmax(1.0, fabs(a)); } #define REQUIRE(cond) require((cond), #cond) #define RANDOM (rand() / (float) RAND_MAX) #define SKIP 77 libplacebo-0.4.0/src/tests/utils.c000066400000000000000000000015451324021332500170440ustar00rootroot00000000000000#include "tests.h" int main() { struct pl_plane_data data = {0}; pl_plane_data_from_mask(&data, (uint64_t[4]){ 0xFF, 0xFF00, 0xFF0000 }); for (int i = 0; i < 3; i++) { REQUIRE(data.component_size[i] == 8); REQUIRE(data.component_pad[i] == 0); REQUIRE(data.component_map[i] == i); } pl_plane_data_from_mask(&data, (uint64_t[4]){ 0xFFFF0000, 0xFFFF }); for (int i = 0; i < 2; i++) { REQUIRE(data.component_size[i] == 16); REQUIRE(data.component_pad[i] == 0); REQUIRE(data.component_map[i] == 1 - i); } pl_plane_data_from_mask(&data, (uint64_t[4]){ 0x03FF, 0x03FF0000 }); REQUIRE(data.component_pad[0] == 0); REQUIRE(data.component_pad[1] == 6); for (int i = 0; i < 2; i++) { REQUIRE(data.component_size[i] == 10); REQUIRE(data.component_map[i] == i); } } libplacebo-0.4.0/src/tests/vulkan.c000066400000000000000000000011401324021332500171730ustar00rootroot00000000000000#include "gpu_tests.h" int main() { struct pl_context *ctx = pl_test_context(); struct pl_vk_inst_params iparams = pl_vk_inst_default_params; iparams.debug = true; const struct pl_vk_inst *vkinst = pl_vk_inst_create(ctx, &iparams); if (!vkinst) return SKIP; struct pl_vulkan_params params = pl_vulkan_default_params; params.instance = vkinst->instance; const struct pl_vulkan *vk = pl_vulkan_create(ctx, ¶ms); if (!vk) return SKIP; gpu_tests(vk->gpu); pl_vulkan_destroy(&vk); pl_vk_inst_destroy(&vkinst); pl_context_destroy(&ctx); } libplacebo-0.4.0/src/utils/000077500000000000000000000000001324021332500155315ustar00rootroot00000000000000libplacebo-0.4.0/src/utils/upload.c000066400000000000000000000124611324021332500171650ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "context.h" #include "common.h" #include "gpu.h" struct comp { int order; // e.g. 0, 1, 2, 3 for RGBA int size; // size in bits int shift; // bit-shift / offset in bits }; static int compare_comp(const void *pa, const void *pb) { const struct comp *a = pa, *b = pb; // Move all of the components with a size of 0 to the end, so they can // be ignored outright if (a->size && !b->size) return -1; if (b->size && !a->size) return 1; // Otherwise, just compare based on the shift return PL_CMP(a->shift, b->shift); } void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]) { struct comp comps[4] = { {0}, {1}, {2}, {3} }; for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { comps[i].size = __builtin_popcount(mask[i]); comps[i].shift = PL_MAX(0, __builtin_ffsll(mask[i]) - 1); } // Sort the components by shift qsort(comps, PL_ARRAY_SIZE(comps), sizeof(struct comp), compare_comp); // Generate the resulting component size/pad/map int offset = 0; for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { if (!comps[i].size) return; assert(comps[i].shift >= offset); data->component_size[i] = comps[i].size; data->component_pad[i] = comps[i].shift - offset; data->component_map[i] = comps[i].order; offset += data->component_size[i] + data->component_pad[i]; } } const struct pl_fmt *pl_plane_find_fmt(const struct pl_gpu *gpu, int out_map[4], const struct pl_plane_data *data) { int dummy[4] = {0}; out_map = PL_DEF(out_map, dummy); // Count the number of components and initialize out_map int num = 0; for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) { out_map[i] = -1; if (data->component_size[i]) num = i+1; } for (int n = 0; n < gpu->num_formats; n++) { const struct pl_fmt *fmt = gpu->formats[n]; if (fmt->opaque || fmt->num_components < num) continue; if (fmt->type != data->type || fmt->texel_size != data->pixel_stride) continue; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) continue; int idx = 0; // Try mapping all pl_plane_data components to texture components for (int i = 0; i < num; i++) { // If there's padding we have to map it to an unused physical // component first int pad = data->component_pad[i]; if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad)) goto next_fmt; // Otherwise, try and match this component int size = data->component_size[i]; if (size && (idx >= 4 || fmt->host_bits[idx] != size)) goto next_fmt; out_map[idx++] = data->component_map[i]; } return fmt; next_fmt: ; // acts as `continue` } return NULL; } bool pl_upload_plane(const struct pl_gpu *gpu, struct pl_plane *plane, const struct pl_plane_data *data) { size_t row_stride = PL_DEF(data->row_stride, data->pixel_stride * data->width); unsigned int stride_texels = row_stride / data->pixel_stride; if (stride_texels * data->pixel_stride != row_stride) { PL_ERR(gpu, "data->row_stride must be a multiple of data->pixel_stride!"); return false; } int out_map[4]; const struct pl_fmt *fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; // TODO: try soft-converting to a supported format using e.g zimg? } bool ok = pl_tex_recreate(gpu, &plane->texture, &(struct pl_tex_params) { .w = data->width, .h = data->height, .format = fmt, .sampleable = true, .host_writable = true, .blit_src = !!(fmt->caps & PL_FMT_CAP_BLITTABLE), .address_mode = PL_TEX_ADDRESS_CLAMP, .sample_mode = (fmt->caps & PL_FMT_CAP_LINEAR) ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, }); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) plane->components = i+1; } return pl_tex_upload(gpu, &(struct pl_tex_transfer_params) { .tex = plane->texture, .stride_w = stride_texels, .ptr = (void *) data->pixels, }); } libplacebo-0.4.0/src/vulkan/000077500000000000000000000000001324021332500156715ustar00rootroot00000000000000libplacebo-0.4.0/src/vulkan/command.c000066400000000000000000000320171324021332500174560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "command.h" #include "utils.h" // returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error static VkResult vk_cmd_poll(struct vk_ctx *vk, struct vk_cmd *cmd, uint64_t timeout) { return vkWaitForFences(vk->dev, 1, &cmd->fence, false, timeout); } static void vk_cmd_reset(struct vk_ctx *vk, struct vk_cmd *cmd) { for (int i = 0; i < cmd->num_callbacks; i++) { struct vk_callback *cb = &cmd->callbacks[i]; cb->run(cb->priv, cb->arg); } cmd->num_callbacks = 0; cmd->num_deps = 0; cmd->num_sigs = 0; // also make sure to reset vk->last_cmd in case this was the last command if (vk->last_cmd == cmd) vk->last_cmd = NULL; } static void vk_cmd_destroy(struct vk_ctx *vk, struct vk_cmd *cmd) { if (!cmd) return; vk_cmd_poll(vk, cmd, UINT64_MAX); vk_cmd_reset(vk, cmd); vkDestroyFence(vk->dev, cmd->fence, VK_ALLOC); vkFreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf); talloc_free(cmd); } static struct vk_cmd *vk_cmd_create(struct vk_ctx *vk, struct vk_cmdpool *pool) { struct vk_cmd *cmd = talloc_zero(NULL, struct vk_cmd); cmd->pool = pool; VkCommandBufferAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = pool->pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VK(vkAllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf)); VkFenceCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = VK_FENCE_CREATE_SIGNALED_BIT, }; VK(vkCreateFence(vk->dev, &finfo, VK_ALLOC, &cmd->fence)); return cmd; error: vk_cmd_destroy(vk, cmd); return NULL; } void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg) { if (vk->last_cmd) { vk_cmd_callback(vk->last_cmd, callback, priv, arg); } else { // The device was already idle, so we can just immediately call it callback((void *) priv, (void *) arg); } } void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg) { TARRAY_APPEND(cmd, cmd->callbacks, cmd->num_callbacks, (struct vk_callback) { .run = callback, .priv = (void *) priv, .arg = (void *) arg, }); } void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage) { int idx = cmd->num_deps++; TARRAY_GROW(cmd, cmd->deps, idx); TARRAY_GROW(cmd, cmd->depstages, idx); cmd->deps[idx] = dep; cmd->depstages[idx] = stage; } void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig) { TARRAY_APPEND(cmd, cmd->sigs, cmd->num_sigs, sig); } struct vk_signal { VkSemaphore semaphore; VkEvent event; enum vk_wait_type type; // last signal type VkQueue source; // last signal source }; struct vk_signal *vk_cmd_signal(struct vk_ctx *vk, struct vk_cmd *cmd, VkPipelineStageFlags stage) { struct vk_signal *sig = NULL; if (TARRAY_POP(vk->signals, vk->num_signals, &sig)) goto done; // no available signal => initialize a new one sig = talloc_zero(NULL, struct vk_signal); static const VkSemaphoreCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; VK(vkCreateSemaphore(vk->dev, &sinfo, VK_ALLOC, &sig->semaphore)); static const VkEventCreateInfo einfo = { .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, }; VK(vkCreateEvent(vk->dev, &einfo, VK_ALLOC, &sig->event)); done: // Signal both the semaphore, and the event if possible. (We will only // end up using one or the other) vk_cmd_sig(cmd, sig->semaphore); sig->type = VK_WAIT_NONE; sig->source = cmd->queue; VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; if (cmd->pool->props.queueFlags & req) { vkCmdSetEvent(cmd->buf, sig->event, stage); sig->type = VK_WAIT_EVENT; } return sig; error: vk_signal_destroy(vk, &sig); return NULL; } static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem) { for (int n = 0; n < cmd->num_sigs; n++) { if (cmd->sigs[n] == sem) { TARRAY_REMOVE_AT(cmd->sigs, cmd->num_sigs, n); return true; } } return false; } // Attempts to remove a queued signal operation. Returns true if sucessful, // i.e. the signal could be removed before it ever got fired. static bool unsignal(struct vk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem) { if (unsignal_cmd(cmd, sem)) return true; // Attempt to remove it from any queued commands for (int i = 0; i < vk->num_cmds_queued; i++) { if (unsignal_cmd(vk->cmds_queued[i], sem)) return true; } return false; } static void release_signal(struct vk_ctx *vk, struct vk_signal *sig) { // The semaphore never needs to be recreated, because it's either // unsignaled while still queued, or unsignaled as a result of a device // wait. But the event *may* need to be reset, so just always reset it. vkResetEvent(vk->dev, sig->event); sig->source = NULL; TARRAY_APPEND(vk, vk->signals, vk->num_signals, sig); } enum vk_wait_type vk_cmd_wait(struct vk_ctx *vk, struct vk_cmd *cmd, struct vk_signal **sigptr, VkPipelineStageFlags stage, VkEvent *out_event) { struct vk_signal *sig = *sigptr; if (!sig) return VK_WAIT_NONE; if (sig->source == cmd->queue && unsignal(vk, cmd, sig->semaphore)) { // If we can remove the semaphore signal operation from the history and // pretend it never happened, then we get to use the more efficient // synchronization primitives. However, this requires that we're still // in the same VkQueue. if (sig->type == VK_WAIT_EVENT && out_event) { *out_event = sig->event; } else { sig->type = VK_WAIT_BARRIER; } } else { // Otherwise, we use the semaphore. (This also unsignals it as a result // of the command execution) vk_cmd_dep(cmd, sig->semaphore, stage); sig->type = VK_WAIT_NONE; } // In either case, once the command completes, we can release the signal // resource back to the pool. vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig); *sigptr = NULL; return sig->type; } void vk_signal_destroy(struct vk_ctx *vk, struct vk_signal **sig) { if (!*sig) return; vkDestroySemaphore(vk->dev, (*sig)->semaphore, VK_ALLOC); vkDestroyEvent(vk->dev, (*sig)->event, VK_ALLOC); talloc_free(*sig); *sig = NULL; } struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, VkDeviceQueueCreateInfo qinfo, VkQueueFamilyProperties props) { struct vk_cmdpool *pool = talloc_ptrtype(NULL, pool); *pool = (struct vk_cmdpool) { .props = props, .qf = qinfo.queueFamilyIndex, .queues = talloc_array(pool, VkQueue, qinfo.queueCount), .num_queues = qinfo.queueCount, }; for (int n = 0; n < pool->num_queues; n++) vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]); VkCommandPoolCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = pool->qf, }; VK(vkCreateCommandPool(vk->dev, &cinfo, VK_ALLOC, &pool->pool)); return pool; error: vk_cmdpool_destroy(vk, pool); return NULL; } void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool) { if (!pool) return; for (int i = 0; i < pool->num_cmds; i++) vk_cmd_destroy(vk, pool->cmds[i]); vkDestroyCommandPool(vk->dev, pool->pool, VK_ALLOC); talloc_free(pool); } struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool) { // garbage collect the cmdpool first, to increase the chances of getting // an already-available command buffer vk_poll_commands(vk, 0); struct vk_cmd *cmd = NULL; if (TARRAY_POP(pool->cmds, pool->num_cmds, &cmd)) goto done; // No free command buffers => allocate another one cmd = vk_cmd_create(vk, pool); if (!cmd) goto error; done: ; VkCommandBufferBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK(vkBeginCommandBuffer(cmd->buf, &binfo)); cmd->queue = pool->queues[pool->idx_queues]; return cmd; error: // Something has to be seriously messed up if we get to this point vk_cmd_destroy(vk, cmd); return NULL; } void vk_cmd_queue(struct vk_ctx *vk, struct vk_cmd *cmd) { struct vk_cmdpool *pool = cmd->pool; VK(vkEndCommandBuffer(cmd->buf)); VK(vkResetFences(vk->dev, 1, &cmd->fence)); TARRAY_APPEND(vk, vk->cmds_queued, vk->num_cmds_queued, cmd); vk->last_cmd = cmd; if (vk->num_cmds_queued >= PL_VK_MAX_QUEUED_CMDS) { PL_WARN(vk, "Exhausted the queued command limit.. forcing a flush now. " "Consider using pl_flush after submitting a batch of work?"); vk_flush_commands(vk); } return; error: vk_cmd_reset(vk, cmd); TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); } bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout) { bool ret = false; if (timeout && vk->num_cmds_queued) vk_flush_commands(vk); while (vk->num_cmds_pending > 0) { struct vk_cmd *cmd = vk->cmds_pending[0]; struct vk_cmdpool *pool = cmd->pool; VkResult res = vk_cmd_poll(vk, cmd, timeout); if (res == VK_TIMEOUT) break; PL_TRACE(vk, "VkFence signalled: %p", (void *) cmd->fence); vk_cmd_reset(vk, cmd); TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0); TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); ret = true; // If we've successfully spent some time waiting for at least one // command, disable the timeout. This has the dual purpose of both // making sure we don't over-wait due to repeat timeout applicaiton, // but also makes sure we don't block on future commands if we've // already spend time waiting for one. timeout = 0; } return ret; } bool vk_flush_commands(struct vk_ctx *vk) { bool ret = true; for (int i = 0; i < vk->num_cmds_queued; i++) { struct vk_cmd *cmd = vk->cmds_queued[i]; struct vk_cmdpool *pool = cmd->pool; VkSubmitInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cmd->buf, .waitSemaphoreCount = cmd->num_deps, .pWaitSemaphores = cmd->deps, .pWaitDstStageMask = cmd->depstages, .signalSemaphoreCount = cmd->num_sigs, .pSignalSemaphores = cmd->sigs, }; if (pl_msg_test(vk->ctx, PL_LOG_TRACE)) { PL_TRACE(vk, "Submitting command on queue %p (QF %d):", (void *)cmd->queue, pool->qf); for (int n = 0; n < cmd->num_deps; n++) PL_TRACE(vk, " waits on semaphore %p", (void *) cmd->deps[n]); for (int n = 0; n < cmd->num_sigs; n++) PL_TRACE(vk, " signals semaphore %p", (void *) cmd->sigs[n]); PL_TRACE(vk, " signals fence %p", (void *) cmd->fence); } VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence)); TARRAY_APPEND(vk, vk->cmds_pending, vk->num_cmds_pending, cmd); continue; error: vk_cmd_reset(vk, cmd); TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); ret = false; } vk->num_cmds_queued = 0; // Rotate the queues to ensure good parallelism across frames for (int i = 0; i < vk->num_pools; i++) { struct vk_cmdpool *pool = vk->pools[i]; pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; } // Wait until we've processed some of the now pending commands while (vk->num_cmds_pending > PL_VK_MAX_PENDING_CMDS) vk_poll_commands(vk, 1000000000); // 1s return ret; } void vk_wait_idle(struct vk_ctx *vk) { while (vk_poll_commands(vk, UINT64_MAX)) ; } libplacebo-0.4.0/src/vulkan/command.h000066400000000000000000000154501324021332500174650ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Since lots of vulkan operations need to be done lazily once the affected // resources are no longer in use, provide an abstraction for tracking these. // In practice, these are only checked and run when submitting new commands, so // the actual execution may be delayed by a frame. typedef void (*vk_cb)(void *p, void *arg); struct vk_callback { vk_cb run; void *priv; void *arg; }; // Associate a callback with the completion of all currently pending commands. // This will essentially run once the device is completely idle. void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg); // Helper wrapper around command buffers that also track dependencies, // callbacks and synchronization primitives struct vk_cmd { struct vk_cmdpool *pool; // pool it was allocated from VkQueue queue; // the submission queue (for recording/pending) VkCommandBuffer buf; // the command buffer itself VkFence fence; // the fence guards cmd buffer reuse // The semaphores represent dependencies that need to complete before // this command can be executed. These are *not* owned by the vk_cmd VkSemaphore *deps; VkPipelineStageFlags *depstages; int num_deps; // The signals represent semaphores that fire once the command finishes // executing. These are also not owned by the vk_cmd VkSemaphore *sigs; int num_sigs; // Since VkFences are useless, we have to manually track "callbacks" // to fire once the VkFence completes. These are used for multiple purposes, // ranging from garbage collection (resource deallocation) to fencing. struct vk_callback *callbacks; int num_callbacks; }; // Associate a callback with the completion of the current command. This // bool will be set to `true` once the command completes, or shortly thereafter. void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg); // Associate a raw dependency for the current command. This semaphore must // signal by the corresponding stage before the command may execute. void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage); // Associate a raw signal with the current command. This semaphore will signal // after the command completes. void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig); enum vk_wait_type { VK_WAIT_NONE, // no synchronization needed VK_WAIT_BARRIER, // synchronization via pipeline barriers VK_WAIT_EVENT, // synchronization via events }; // Signal abstraction: represents an abstract synchronization mechanism. // Internally, this may either resolve as a semaphore or an event depending // on whether the appropriate conditions are met. struct vk_signal; // Generates a signal after the execution of all previous commands matching the // given the pipeline stage. The signal is owned by the caller, and must be // consumed eith vk_cmd_wait or released with vk_signal_cancel in order to // free the resources. struct vk_signal *vk_cmd_signal(struct vk_ctx *vk, struct vk_cmd *cmd, VkPipelineStageFlags stage); // Consumes a previously generated signal. This signal must fire by the // indicated stage before the command can run. This function takes over // ownership of the signal (and the signal will be released/reused // automatically) // // The return type indicates what the caller needs to do: // VK_SIGNAL_NONE: no further handling needed, caller can use TOP_OF_PIPE // VK_SIGNAL_BARRIER: caller must use pipeline barrier from last stage // VK_SIGNAL_EVENT: caller must use VkEvent from last stage // (never returned if out_event is NULL) enum vk_wait_type vk_cmd_wait(struct vk_ctx *vk, struct vk_cmd *cmd, struct vk_signal **sigptr, VkPipelineStageFlags stage, VkEvent *out_event); // Destroys a currently pending signal, for example if the resource is no // longer relevant. void vk_signal_destroy(struct vk_ctx *vk, struct vk_signal **sig); // Command pool / queue family hybrid abstraction struct vk_cmdpool { VkQueueFamilyProperties props; int qf; // queue family index VkCommandPool pool; VkQueue *queues; int num_queues; int idx_queues; // Command buffers associated with this queue. These are available for // re-recording struct vk_cmd **cmds; int num_cmds; }; // Set up a vk_cmdpool corresponding to a queue family. struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, VkDeviceQueueCreateInfo qinfo, VkQueueFamilyProperties props); void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool); // Fetch a command buffer from a command pool and begin recording to it. // Returns NULL on failure. struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool); // Finish recording a command buffer and queue it for execution. This function // takes over ownership of *cmd, i.e. the caller should not touch it again. void vk_cmd_queue(struct vk_ctx *vk, struct vk_cmd *cmd); // Block until some commands complete executing. This is the only function that // actually processes the callbacks. Will wait at most `timeout` nanoseconds // for the completion of any command. The timeout may also be passed as 0, in // which case this function will not block, but only poll for completed // commands. Returns whether any forward progress was made. bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout); // Flush all currently queued commands. Call this once per frame, after // submitting all of the command buffers for that frame. Calling this more // often than that is possible but bad for performance. // Returns whether successful. Failed commands will be implicitly dropped. bool vk_flush_commands(struct vk_ctx *vk); // Wait until all commands are complete, i.e. the device is idle. This is // basically equivalent to calling `vk_poll_commands` with a timeout of // UINT64_MAX until it returns `false`. void vk_wait_idle(struct vk_ctx *vk); libplacebo-0.4.0/src/vulkan/common.h000066400000000000000000000050271324021332500173360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../common.h" #include "../context.h" // Vulkan allows the optional use of a custom allocator. We don't need one but // mark this parameter with a better name in case we ever decide to change this // in the future. (And to make the code more readable) #define VK_ALLOC NULL // Needed to load some extension-specific functions (for whatever reason) #define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \ vkGetInstanceProcAddr(inst, #name); // Hard-coded limit on the number of pending commands, to avoid OOM loops #define PL_VK_MAX_QUEUED_CMDS 64 #define PL_VK_MAX_PENDING_CMDS 64 // Shared struct used to hold vulkan context information struct vk_ctx { const struct pl_vk_inst *internal_instance; struct pl_context *ctx; VkInstance inst; VkPhysicalDevice physd; VkPhysicalDeviceLimits limits; VkPhysicalDeviceFeatures features; VkDevice dev; struct vk_cmdpool **pools; // command pools (one per queue family) int num_pools; // Pointers into *pools struct vk_cmdpool *pool_graphics; // required struct vk_cmdpool *pool_compute; // optional struct vk_cmdpool *pool_transfer; // optional // Queued/pending commands. These are shared for the entire mpvk_ctx to // ensure submission and callbacks are FIFO struct vk_cmd **cmds_queued; // recorded but not yet submitted struct vk_cmd **cmds_pending; // submitted but not completed int num_cmds_queued; int num_cmds_pending; // A dynamic reference to the most recently submitted command that has not // yet completed. Used to implement vk_dev_callback. Gets cleared when // the command completes. struct vk_cmd *last_cmd; // Common pool of signals, to avoid having to re-create these objects often struct vk_signal **signals; int num_signals; }; libplacebo-0.4.0/src/vulkan/context.c000066400000000000000000000412741324021332500175310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "utils.h" #include "gpu.h" const struct pl_vk_inst_params pl_vk_inst_default_params = {0}; void pl_vk_inst_destroy(const struct pl_vk_inst **inst_ptr) { const struct pl_vk_inst *inst = *inst_ptr; if (!inst) return; VkDebugReportCallbackEXT debug = (VkDebugReportCallbackEXT) inst->priv; if (debug) { VK_LOAD_PFN(inst->instance, vkDestroyDebugReportCallbackEXT) pfn_vkDestroyDebugReportCallbackEXT(inst->instance, debug, VK_ALLOC); } vkDestroyInstance(inst->instance, VK_ALLOC); TA_FREEP((void **) inst_ptr); } static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objType, uint64_t obj, size_t loc, int32_t msgCode, const char *layer, const char *msg, void *priv) { struct pl_context *ctx = priv; enum pl_log_level lev = PL_LOG_INFO; switch (flags) { case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = PL_LOG_ERR; break; case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = PL_LOG_WARN; break; case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = PL_LOG_WARN; break; case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = PL_LOG_DEBUG; break; case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = PL_LOG_TRACE; break; }; pl_msg(ctx, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)", layer, (int) msgCode, msg, (unsigned long long) obj, vk_obj_str(objType), loc); // The return value of this function determines whether the call will // be explicitly aborted (to prevent GPU errors) or not. In this case, // we generally want this to be on for the errors. return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT); } const struct pl_vk_inst *pl_vk_inst_create(struct pl_context *ctx, const struct pl_vk_inst_params *params) { void *tmp = talloc_new(NULL); params = PL_DEF(params, &pl_vk_inst_default_params); const char **exts = NULL; int num_exts = 0; VkInstanceCreateInfo info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, }; // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) TARRAY_APPEND(tmp, exts, num_exts, params->extensions[i]); if (params->debug) { pl_info(ctx, "Enabling vulkan debug layers"); // Enables the LunarG standard validation layer, which // is a meta-layer that loads lots of other validators static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation", }; info.ppEnabledLayerNames = layers; info.enabledLayerCount = PL_ARRAY_SIZE(layers); // Enable support for debug callbacks, so we get useful messages TARRAY_APPEND(tmp, exts, num_exts, VK_EXT_DEBUG_REPORT_EXTENSION_NAME); } info.ppEnabledExtensionNames = exts; info.enabledExtensionCount = num_exts; VkInstance inst = NULL; VkResult res = vkCreateInstance(&info, VK_ALLOC, &inst); if (res != VK_SUCCESS) { pl_fatal(ctx, "Failed creating instance: %s", vk_res_str(res)); goto error; } VkDebugReportCallbackEXT debug = NULL; if (params->debug) { // Set up a debug callback to catch validation messages VkDebugReportCallbackCreateInfoEXT dinfo = { .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_DEBUG_BIT_EXT, .pfnCallback = vk_dbg_callback, .pUserData = ctx, }; // Since this is not part of the core spec, we need to load it. This // can't fail because we've already successfully created an instance // with this extension enabled. VK_LOAD_PFN(inst, vkCreateDebugReportCallbackEXT) pfn_vkCreateDebugReportCallbackEXT(inst, &dinfo, VK_ALLOC, &debug); } talloc_free(tmp); return talloc_struct(NULL, struct pl_vk_inst, { .instance = inst, .priv = (uint64_t) debug, }); error: pl_fatal(ctx, "Failed initializing vulkan instance"); pl_vk_inst_destroy((const struct pl_vk_inst **) &inst); talloc_free(tmp); return NULL; } const struct pl_vulkan_params pl_vulkan_default_params = { .async_transfer = true, .async_compute = true, .queue_count = 1, // enabling multiple queues often decreases perf }; void pl_vulkan_destroy(const struct pl_vulkan **pl_vk) { if (!*pl_vk) return; pl_gpu_destroy((*pl_vk)->gpu); struct vk_ctx *vk = (*pl_vk)->priv; if (vk->dev) { PL_DEBUG(vk, "Flushing remaining commands..."); vk_wait_idle(vk); pl_assert(vk->num_cmds_queued == 0); pl_assert(vk->num_cmds_pending == 0); for (int i = 0; i < vk->num_pools; i++) vk_cmdpool_destroy(vk, vk->pools[i]); for (int i = 0; i < vk->num_signals; i++) vk_signal_destroy(vk, &vk->signals[i]); vkDestroyDevice(vk->dev, VK_ALLOC); } pl_vk_inst_destroy(&vk->internal_instance); TA_FREEP((void **) pl_vk); } static bool supports_surf(struct vk_ctx *vk, VkPhysicalDevice physd, VkSurfaceKHR surf) { uint32_t qfnum; vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL); for (int i = 0; i < qfnum; i++) { VkBool32 sup; VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, surf, &sup)); if (sup) return true; } error: return false; } static bool find_physical_device(struct vk_ctx *vk, const struct pl_vulkan_params *params) { PL_INFO(vk, "Probing for vulkan devices:"); bool ret = false; VkPhysicalDevice *devices = NULL; uint32_t num = 0; VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL)); devices = talloc_array(NULL, VkPhysicalDevice, num); VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices)); static const struct { const char *name; int priority; } types[] = { [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = {"discrete", 5}, [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = {"integrated", 4}, [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = {"virtual", 3}, [VK_PHYSICAL_DEVICE_TYPE_CPU] = {"software", 2}, [VK_PHYSICAL_DEVICE_TYPE_OTHER] = {"other", 1}, [VK_PHYSICAL_DEVICE_TYPE_END_RANGE+1] = {0}, }; int best = 0; for (int i = 0; i < num; i++) { VkPhysicalDeviceProperties props; vkGetPhysicalDeviceProperties(devices[i], &props); VkPhysicalDeviceType t = props.deviceType; PL_INFO(vk, " GPU %d: %s (%s)", i, props.deviceName, types[t].name); if (params->surface && !supports_surf(vk, devices[i], params->surface)) { PL_DEBUG(vk, " -> excluding due to lack of surface support"); continue; } if (params->device_name) { if (strcmp(params->device_name, props.deviceName) == 0) { vk->physd = devices[i]; best = 10; // high number... } else { PL_DEBUG(vk, " -> excluding due to name mismatch"); continue; } } if (!params->allow_software && t == VK_PHYSICAL_DEVICE_TYPE_CPU) { PL_DEBUG(vk, " -> excluding due to params->allow_software"); continue; } if (types[t].priority > best) { vk->physd = devices[i]; best = types[t].priority; } } if (!vk->physd) { PL_FATAL(vk, "Found no suitable device, giving up."); goto error; } ret = true; error: talloc_free(devices); return ret; } // Find the most specialized queue supported a combination of flags. In cases // where there are multiple queue families at the same specialization level, // this finds the one with the most queues. Returns -1 if no queue was found. static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags) { int idx = -1; for (int i = 0; i < qfnum; i++) { if (!(qfs[i].queueFlags & flags)) continue; // QF is more specialized. Since we don't care about other bits like // SPARSE_BIT, mask the ones we're interestew in const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_COMPUTE_BIT; if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask)) idx = i; // QF has more queues (at the same specialization level) if (qfs[i].queueFlags == qfs[idx].queueFlags && qfs[i].queueCount > qfs[idx].queueCount) idx = i; } return idx; } static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos, int *num_qinfos, VkQueueFamilyProperties *qfs, int idx, int qcount) { if (idx < 0) return; // Check to see if we've already added this queue family for (int i = 0; i < *num_qinfos; i++) { if ((*qinfos)[i].queueFamilyIndex == idx) return; } if (!qcount) qcount = qfs[idx].queueCount; float *priorities = talloc_zero_array(tactx, float, qcount); VkDeviceQueueCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = idx, .queueCount = PL_MIN(qcount, qfs[idx].queueCount), .pQueuePriorities = priorities, }; TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo); } static bool device_init(struct vk_ctx *vk, const struct pl_vulkan_params *params) { pl_assert(vk->physd); void *tmp = talloc_new(NULL); // Enumerate the queue families and find suitable families for each task int qfnum; vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum); vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); PL_INFO(vk, "Queue families supported by device:"); for (int i = 0; i < qfnum; i++) { PL_INFO(vk, " QF %d: flags 0x%x num %d", i, (unsigned) qfs[i].queueFlags, (int) qfs[i].queueCount); } int idx_gfx = -1, idx_comp = -1, idx_tf = -1; idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT); if (params->async_compute) idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT); if (params->async_transfer) idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT); // Vulkan requires at least one GRAPHICS queue, so if this fails something // is horribly wrong. pl_assert(idx_gfx >= 0); PL_INFO(vk, "Using graphics queue (QF %d)", idx_gfx); // If needed, ensure we can actually present to the surface using this queue if (params->surface) { VkBool32 sup; VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, params->surface, &sup)); if (!sup) { PL_FATAL(vk, "Queue family does not support surface presentation!"); goto error; } } if (idx_tf >= 0 && idx_tf != idx_gfx) PL_INFO(vk, "Using async transfer (QF %d)", idx_tf); if (idx_comp >= 0 && idx_comp != idx_gfx) PL_INFO(vk, "Using async compute (QF %d)", idx_comp); // Fall back to supporting compute shaders via the graphics pool for // devices which support compute shaders but not async compute. if (idx_comp < 0 && qfs[idx_gfx].queueFlags & VK_QUEUE_COMPUTE_BIT) idx_comp = idx_gfx; // Now that we know which QFs we want, we can create the logical device VkDeviceQueueCreateInfo *qinfos = NULL; int num_qinfos = 0; add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, params->queue_count); add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, params->queue_count); add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, params->queue_count); const char **exts = NULL; int num_exts = 0; if (params->surface) TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) TARRAY_APPEND(tmp, exts, num_exts, params->extensions[i]); // Enable all features that we might need (whitelisted) vkGetPhysicalDeviceFeatures(vk->physd, &vk->features); #define FEATURE(name) .name = vk->features.name vk->features = (VkPhysicalDeviceFeatures) { FEATURE(shaderImageGatherExtended), FEATURE(shaderStorageImageExtendedFormats), }; #undef FEATURE VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = qinfos, .queueCreateInfoCount = num_qinfos, .ppEnabledExtensionNames = exts, .enabledExtensionCount = num_exts, .pEnabledFeatures = &vk->features, }; PL_INFO(vk, "Creating vulkan device%s", num_exts ? " with extensions:" : ""); for (int i = 0; i < num_exts; i++) PL_INFO(vk, " %s", exts[i]); VK(vkCreateDevice(vk->physd, &dinfo, VK_ALLOC, &vk->dev)); // Create the command pools and memory allocator for (int i = 0; i < num_qinfos; i++) { int qf = qinfos[i].queueFamilyIndex; struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]); if (!pool) goto error; TARRAY_APPEND(vk, vk->pools, vk->num_pools, pool); // Update the pool_* pointers based on the corresponding index if (qf == idx_gfx) vk->pool_graphics = pool; if (qf == idx_comp) vk->pool_compute = pool; if (qf == idx_tf) vk->pool_transfer = pool; } talloc_free(tmp); return true; error: PL_FATAL(vk, "Failed creating logical device!"); talloc_free(tmp); return false; } const struct pl_vulkan *pl_vulkan_create(struct pl_context *ctx, const struct pl_vulkan_params *params) { params = PL_DEF(params, &pl_vulkan_default_params); struct pl_vulkan *pl_vk = talloc_zero(NULL, struct pl_vulkan); struct vk_ctx *vk = pl_vk->priv = talloc_zero(pl_vk, struct vk_ctx); vk->ctx = ctx; vk->inst = params->instance; if (!vk->inst) { pl_assert(!params->surface); pl_assert(!params->device); PL_DEBUG(vk, "No VkInstance provided, creating one..."); vk->internal_instance = pl_vk_inst_create(ctx, NULL); if (!vk->internal_instance) goto error; vk->inst = vk->internal_instance->instance; } // Choose the physical device if (params->device) { PL_DEBUG(vk, "Using specified VkPhysicalDevice"); vk->physd = params->device; } else if (!find_physical_device(vk, params)) { goto error; } VkPhysicalDeviceProperties prop; vkGetPhysicalDeviceProperties(vk->physd, &prop); vk->limits = prop.limits; PL_INFO(vk, "Vulkan device properties:"); PL_INFO(vk, " Device Name: %s", prop.deviceName); PL_INFO(vk, " Device ID: %x:%x", (unsigned) prop.vendorID, (unsigned) prop.deviceID); PL_INFO(vk, " Driver version: %d", (int) prop.driverVersion); PL_INFO(vk, " API version: %d.%d.%d", (int) VK_VERSION_MAJOR(prop.apiVersion), (int) VK_VERSION_MINOR(prop.apiVersion), (int) VK_VERSION_PATCH(prop.apiVersion)); // Finally, initialize the logical device and the rest of the vk_ctx if (!device_init(vk, params)) goto error; pl_vk->gpu = pl_gpu_create_vk(vk); if (!pl_vk->gpu) goto error; // Expose the resulting vulkan objects pl_vk->instance = vk->inst; pl_vk->phys_device = vk->physd; pl_vk->device = vk->dev; return pl_vk; error: PL_FATAL(vk, "Failed initializing vulkan device"); pl_vulkan_destroy((const struct pl_vulkan **) &pl_vk); return NULL; } libplacebo-0.4.0/src/vulkan/formats.c000066400000000000000000000215551324021332500175200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "formats.h" #define FMT(_name, num, size, ftype, bits, idx) \ (struct pl_fmt) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .num_components = num, \ .component_depth = bits, \ .opaque = false, \ .texel_size = size, \ .host_bits = bits, \ .sample_order = idx, \ } #define IDX(...) {__VA_ARGS__} #define BITS(...) {__VA_ARGS__} #define REGFMT(name, num, bits, type) \ FMT(name, num, (num) * (bits) / 8, type, \ BITS(bits, bits, bits, bits), \ IDX(0, 1, 2, 3)) const struct vk_format vk_formats[] = { // Regular, byte-aligned integer formats {VK_FORMAT_R8_UNORM, REGFMT("r8", 1, 8, UNORM)}, {VK_FORMAT_R8G8_UNORM, REGFMT("rg8", 2, 8, UNORM)}, {VK_FORMAT_R8G8B8_UNORM, REGFMT("rgb8", 3, 8, UNORM), .emufmt = VK_FORMAT_R8G8B8A8_UNORM}, {VK_FORMAT_R8G8B8A8_UNORM, REGFMT("rgba8", 4, 8, UNORM)}, {VK_FORMAT_R16_UNORM, REGFMT("r16", 1, 16, UNORM)}, {VK_FORMAT_R16G16_UNORM, REGFMT("rg16", 2, 16, UNORM)}, {VK_FORMAT_R16G16B16_UNORM, REGFMT("rgb16", 3, 16, UNORM), .emufmt = VK_FORMAT_R16G16B16A16_UNORM}, {VK_FORMAT_R16G16B16A16_UNORM, REGFMT("rgba16", 4, 16, UNORM)}, {VK_FORMAT_R8_SNORM, REGFMT("r8s", 1, 8, SNORM)}, {VK_FORMAT_R8G8_SNORM, REGFMT("rg8s", 2, 8, SNORM)}, {VK_FORMAT_R8G8B8_SNORM, REGFMT("rgb8s", 3, 8, SNORM)}, {VK_FORMAT_R8G8B8A8_SNORM, REGFMT("rgba8s", 4, 8, SNORM)}, {VK_FORMAT_R16_SNORM, REGFMT("r16s", 1, 16, SNORM)}, {VK_FORMAT_R16G16_SNORM, REGFMT("rg16s", 2, 16, SNORM)}, {VK_FORMAT_R16G16B16_SNORM, REGFMT("rgb16s", 3, 16, SNORM)}, {VK_FORMAT_R16G16B16A16_SNORM, REGFMT("rgba16s", 4, 16, SNORM)}, // Float formats (native formats: hf = half float, df = double float) {VK_FORMAT_R16_SFLOAT, REGFMT("r16hf", 1, 16, FLOAT)}, {VK_FORMAT_R16G16_SFLOAT, REGFMT("rg16hf", 2, 16, FLOAT)}, {VK_FORMAT_R16G16B16_SFLOAT, REGFMT("rgb16hf", 3, 16, FLOAT)}, {VK_FORMAT_R16G16B16A16_SFLOAT, REGFMT("rgba16hf", 4, 16, FLOAT)}, {VK_FORMAT_R32_SFLOAT, REGFMT("r32f", 1, 32, FLOAT)}, {VK_FORMAT_R32G32_SFLOAT, REGFMT("rg32f", 2, 32, FLOAT)}, {VK_FORMAT_R32G32B32_SFLOAT, REGFMT("rgb32f", 3, 32, FLOAT)}, {VK_FORMAT_R32G32B32A32_SFLOAT, REGFMT("rgba32f", 4, 32, FLOAT)}, {VK_FORMAT_R64_SFLOAT, REGFMT("r64df", 1, 64, FLOAT)}, {VK_FORMAT_R64G64_SFLOAT, REGFMT("rg64df", 2, 64, FLOAT)}, {VK_FORMAT_R64G64B64_SFLOAT, REGFMT("rgb64df", 3, 64, FLOAT)}, {VK_FORMAT_R64G64B64A64_SFLOAT, REGFMT("rgba64df", 4, 64, FLOAT)}, // Integer-sampled formats {VK_FORMAT_R8_UINT, REGFMT("r8u", 1, 8, UINT)}, {VK_FORMAT_R8G8_UINT, REGFMT("rg8u", 2, 8, UINT)}, {VK_FORMAT_R8G8B8_UINT, REGFMT("rgb8u", 3, 8, UINT)}, {VK_FORMAT_R8G8B8A8_UINT, REGFMT("rgba8u", 4, 8, UINT)}, {VK_FORMAT_R16_UINT, REGFMT("r16u", 1, 16, UINT)}, {VK_FORMAT_R16G16_UINT, REGFMT("rg16u", 2, 16, UINT)}, {VK_FORMAT_R16G16B16_UINT, REGFMT("rgb16u", 3, 16, UINT)}, {VK_FORMAT_R16G16B16A16_UINT, REGFMT("rgba16u", 4, 16, UINT)}, {VK_FORMAT_R32_UINT, REGFMT("r32u", 1, 32, UINT)}, {VK_FORMAT_R32G32_UINT, REGFMT("rg32u", 2, 32, UINT)}, {VK_FORMAT_R32G32B32_UINT, REGFMT("rgb32u", 3, 32, UINT)}, {VK_FORMAT_R32G32B32A32_UINT, REGFMT("rgba32u", 4, 32, UINT)}, {VK_FORMAT_R64_UINT, REGFMT("r64u", 1, 64, UINT)}, {VK_FORMAT_R64G64_UINT, REGFMT("rg64u", 2, 64, UINT)}, {VK_FORMAT_R64G64B64_UINT, REGFMT("rgb64u", 3, 64, UINT)}, {VK_FORMAT_R64G64B64A64_UINT, REGFMT("rgba64u", 4, 64, UINT)}, {VK_FORMAT_R8_SINT, REGFMT("r8i", 1, 8, SINT)}, {VK_FORMAT_R8G8_SINT, REGFMT("rg8i", 2, 8, SINT)}, {VK_FORMAT_R8G8B8_SINT, REGFMT("rgb8i", 3, 8, SINT)}, {VK_FORMAT_R8G8B8A8_SINT, REGFMT("rgba8i", 4, 8, SINT)}, {VK_FORMAT_R16_SINT, REGFMT("r16i", 1, 16, SINT)}, {VK_FORMAT_R16G16_SINT, REGFMT("rg16i", 2, 16, SINT)}, {VK_FORMAT_R16G16B16_SINT, REGFMT("rgb16i", 3, 16, SINT)}, {VK_FORMAT_R16G16B16A16_SINT, REGFMT("rgba16i", 4, 16, SINT)}, {VK_FORMAT_R32_SINT, REGFMT("r32i", 1, 32, SINT)}, {VK_FORMAT_R32G32_SINT, REGFMT("rg32i", 2, 32, SINT)}, {VK_FORMAT_R32G32B32_SINT, REGFMT("rgb32i", 3, 32, SINT)}, {VK_FORMAT_R32G32B32A32_SINT, REGFMT("rgba32i", 4, 32, SINT)}, {VK_FORMAT_R64_SINT, REGFMT("r64i", 1, 64, SINT)}, {VK_FORMAT_R64G64_SINT, REGFMT("rg64i", 2, 64, SINT)}, {VK_FORMAT_R64G64B64_SINT, REGFMT("rgb64i", 3, 64, SINT)}, {VK_FORMAT_R64G64B64A64_SINT, REGFMT("rgba64i", 4, 64, SINT)}, // "Swapped" component order formats {VK_FORMAT_B8G8R8_UNORM, FMT("bgr8", 3, 3, UNORM, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UNORM, FMT("bgra8", 4, 4, UNORM, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_B4G4R4A4_UNORM_PACK16, FMT("bgra4", 4, 2, UNORM, BITS(4, 4, 4, 4), IDX(2, 1, 0, 3))}, {VK_FORMAT_B5G6R5_UNORM_PACK16, FMT("bgr565", 3, 2, UNORM, BITS(5, 6, 5), IDX(2, 1, 0))}, {VK_FORMAT_B5G5R5A1_UNORM_PACK16, FMT("bgr5a1", 4, 2, UNORM, BITS(5, 5, 5, 1), IDX(2, 1, 0, 3))}, {VK_FORMAT_A1R5G5B5_UNORM_PACK16, FMT("a1rgb5", 4, 2, UNORM, BITS(1, 5, 5, 5), IDX(3, 0, 1, 2))}, {VK_FORMAT_A2R10G10B10_UNORM_PACK32, FMT("a2rgb10", 4, 4, UNORM, BITS(2, 10, 10, 10), IDX(3, 0, 1, 2))}, {VK_FORMAT_A2B10G10R10_UNORM_PACK32, FMT("a2bgr10", 4, 4, UNORM, BITS(2, 10, 10, 10), IDX(3, 2, 1, 0))}, {VK_FORMAT_A8B8G8R8_UNORM_PACK32, FMT("abgr8", 4, 4, UNORM, BITS(8, 8, 8, 8), IDX(3, 2, 1, 0))}, {VK_FORMAT_A2R10G10B10_SNORM_PACK32, FMT("a2rgb10s", 4, 4, SNORM, BITS(2, 10, 10, 10), IDX(3, 0, 1, 2))}, {VK_FORMAT_A2B10G10R10_SNORM_PACK32, FMT("a2bgr10s", 4, 4, SNORM, BITS(2, 10, 10, 10), IDX(3, 2, 1, 0))}, {VK_FORMAT_A8B8G8R8_SNORM_PACK32, FMT("abgr8s", 4, 4, SNORM, BITS(8, 8, 8, 8), IDX(3, 2, 1, 0))}, {VK_FORMAT_B8G8R8_UINT, FMT("bgr8u", 3, 3, UINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UINT, FMT("bgra8u", 4, 4, UINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2R10G10B10_UINT_PACK32, FMT("a2rgb10u", 4, 4, UINT, BITS(2, 10, 10, 10), IDX(3, 0, 1, 2))}, {VK_FORMAT_A2B10G10R10_UINT_PACK32, FMT("a2bgr10u", 4, 4, UINT, BITS(2, 10, 10, 10), IDX(3, 2, 1, 0))}, {VK_FORMAT_A8B8G8R8_UINT_PACK32, FMT("abgr8u", 4, 4, UINT, BITS(8, 8, 8, 8), IDX(3, 2, 1, 0))}, {VK_FORMAT_B8G8R8_SINT, FMT("bgr8i", 3, 3, SINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_SINT, FMT("bgra8i", 4, 4, SINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2R10G10B10_SINT_PACK32, FMT("a2rgb10i", 4, 4, SINT, BITS(2, 10, 10, 10), IDX(3, 0, 1, 2))}, {VK_FORMAT_A2B10G10R10_SINT_PACK32, FMT("a2bgr10i", 4, 4, SINT, BITS(2, 10, 10, 10), IDX(3, 2, 1, 0))}, {VK_FORMAT_A8B8G8R8_SINT_PACK32, FMT("abgr8i", 4, 4, SINT, BITS(8, 8, 8, 8), IDX(3, 2, 1, 0))}, // Special, packed integer formats (low bit depth) {VK_FORMAT_R4G4_UNORM_PACK8, REGFMT("rg4", 2, 4, UNORM)}, {VK_FORMAT_R4G4B4A4_UNORM_PACK16, REGFMT("rgba4", 4, 4, UNORM)}, {VK_FORMAT_R5G6B5_UNORM_PACK16, FMT("rgb565", 3, 2, UNORM, BITS(5, 6, 5), IDX(0, 1, 2))}, {VK_FORMAT_R5G5B5A1_UNORM_PACK16, FMT("rgb5a1", 4, 2, UNORM, BITS(5, 5, 5, 1), IDX(0, 1, 2, 3))}, {0} }; #undef BITS #undef IDX #undef REGFMT #undef FMT libplacebo-0.4.0/src/vulkan/formats.h000066400000000000000000000017771324021332500175310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "gpu.h" #include "common.h" struct vk_format { VkFormat ifmt; // internal vulkan format enum struct pl_fmt fmt; // pl_fmt template (features will be auto-detected) VkFormat emufmt; // alternate format for texture emulation }; extern const struct vk_format vk_formats[]; libplacebo-0.4.0/src/vulkan/gpu.c000066400000000000000000002244571324021332500166460ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "command.h" #include "formats.h" #include "malloc.h" #include "spirv.h" static struct pl_gpu_fns pl_fns_vk; enum queue_type { GRAPHICS, COMPUTE, TRANSFER, }; // For gpu.priv struct pl_vk { struct vk_ctx *vk; struct vk_malloc *alloc; struct spirv_compiler *spirv; // This is a pl_dispatch used (on ourselves!) for the purposes of // dispatching compute shaders for performing various emulation tasks // (e.g. partial clears, blits or emulated texture transfers). // Warning: Care must be taken to avoid recursive calls. struct pl_dispatch *dp; // The "currently recording" command. This will be queued and replaced by // a new command every time we need to "switch" between queue families. struct vk_cmd *cmd; }; struct vk_ctx *pl_vk_get(const struct pl_gpu *gpu) { if (gpu->impl != &pl_fns_vk) return NULL; struct pl_vk *p = gpu->priv; return p->vk; } static void vk_submit(const struct pl_gpu *gpu) { struct pl_vk *p = gpu->priv; struct vk_ctx *vk = pl_vk_get(gpu); if (p->cmd) { vk_cmd_queue(vk, p->cmd); p->cmd = NULL; } } // Returns a command buffer, or NULL on error static struct vk_cmd *vk_require_cmd(const struct pl_gpu *gpu, enum queue_type type) { struct pl_vk *p = gpu->priv; struct vk_ctx *vk = pl_vk_get(gpu); struct vk_cmdpool *pool; switch (type) { case GRAPHICS: pool = vk->pool_graphics; break; case COMPUTE: pool = vk->pool_compute; break; // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec) case TRANSFER: pool = vk->pool_transfer; if (!pool) pool = vk->pool_compute; if (!pool) pool = vk->pool_graphics; break; default: abort(); } pl_assert(pool); if (p->cmd && p->cmd->pool == pool) return p->cmd; vk_submit(gpu); p->cmd = vk_cmd_begin(vk, pool); return p->cmd; } #define MAKE_LAZY_DESTRUCTOR(fun, argtype) \ static void fun##_lazy(const struct pl_gpu *gpu, const argtype *arg) { \ struct pl_vk *p = gpu->priv; \ struct vk_ctx *vk = pl_vk_get(gpu); \ if (p->cmd) { \ vk_cmd_callback(p->cmd, (vk_cb) fun, gpu, (void *) arg); \ } else { \ vk_dev_callback(vk, (vk_cb) fun, gpu, (void *) arg); \ } \ } static void vk_destroy_ra(const struct pl_gpu *gpu) { struct pl_vk *p = gpu->priv; struct vk_ctx *vk = pl_vk_get(gpu); pl_dispatch_destroy(&p->dp); vk_submit(gpu); vk_wait_idle(vk); vk_malloc_destroy(&p->alloc); spirv_compiler_destroy(&p->spirv); talloc_free((void *) gpu); } static void vk_setup_formats(struct pl_gpu *gpu) { struct vk_ctx *vk = pl_vk_get(gpu); for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->ifmt; vk_fmt++) { VkFormatProperties prop; vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->ifmt, &prop); struct pl_fmt *fmt = talloc_ptrtype(gpu, fmt); *fmt = vk_fmt->fmt; fmt->priv = vk_fmt; // For sanity, clear the superfluous fields for (int i = fmt->num_components; i < 4; i++) { fmt->component_depth[i] = 0; fmt->sample_order[i] = 0; fmt->host_bits[i] = 0; } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bufbits[] = { {VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT, PL_FMT_CAP_VERTEX}, {VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_UNIFORM}, {VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_STORAGE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bufbits); i++) { if ((prop.bufferFeatures & bufbits[i].flags) == bufbits[i].flags) fmt->caps |= bufbits[i].caps; } if (fmt->caps) { fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); pl_assert(fmt->glsl_type); } // For the texture capabilities, try falling back to the emulation // format if this format is wholly unsupported. if (!prop.optimalTilingFeatures && vk_fmt->emufmt) { fmt->emulated = true; vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->emufmt, &prop); } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bits[] = { {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT, PL_FMT_CAP_BLENDABLE}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, PL_FMT_CAP_LINEAR}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT, PL_FMT_CAP_SAMPLEABLE}, {VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT, PL_FMT_CAP_STORABLE}, {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT, PL_FMT_CAP_RENDERABLE}, // We don't distinguish between the two blit modes for pl_fmt_caps {VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT, PL_FMT_CAP_BLITTABLE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bits); i++) { if ((prop.optimalTilingFeatures & bits[i].flags) == bits[i].flags) fmt->caps |= bits[i].caps; } // Disable implied capabilities where the dependencies are unavailable if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) fmt->caps &= ~PL_FMT_CAP_LINEAR; if (!(gpu->caps & PL_GPU_CAP_COMPUTE)) fmt->caps &= ~(PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE); enum pl_fmt_caps storable = PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE; if (fmt->caps & storable) { fmt->glsl_format = pl_fmt_glsl_format(fmt); if (!fmt->glsl_format) { PL_WARN(gpu, "Storable format '%s' has no matching GLSL format " "qualifier?", fmt->name); fmt->caps &= ~storable; } } TARRAY_APPEND(gpu, gpu->formats, gpu->num_formats, fmt); } pl_gpu_sort_formats(gpu); } const struct pl_gpu *pl_gpu_create_vk(struct vk_ctx *vk) { pl_assert(vk->dev); struct pl_gpu *gpu = talloc_zero(NULL, struct pl_gpu); gpu->ctx = vk->ctx; gpu->impl = &pl_fns_vk; struct pl_vk *p = gpu->priv = talloc_zero(gpu, struct pl_vk); p->vk = vk; p->spirv = spirv_compiler_create(vk->ctx); p->alloc = vk_malloc_create(vk); if (!p->alloc || !p->spirv) goto error; gpu->glsl = p->spirv->glsl; gpu->limits = (struct pl_gpu_limits) { .max_tex_1d_dim = vk->limits.maxImageDimension1D, .max_tex_2d_dim = vk->limits.maxImageDimension2D, .max_tex_3d_dim = vk->limits.maxImageDimension3D, .max_pushc_size = vk->limits.maxPushConstantsSize, .max_xfer_size = SIZE_MAX, // no limit imposed by vulkan .max_ubo_size = vk->limits.maxUniformBufferRange, .max_ssbo_size = vk->limits.maxStorageBufferRange, .max_buffer_texels = vk->limits.maxTexelBufferElements, .min_gather_offset = vk->limits.minTexelGatherOffset, .max_gather_offset = vk->limits.maxTexelGatherOffset, .align_tex_xfer_stride = vk->limits.optimalBufferCopyRowPitchAlignment, .align_tex_xfer_offset = vk->limits.optimalBufferCopyOffsetAlignment, }; if (vk->pool_compute) { gpu->caps |= PL_GPU_CAP_COMPUTE; gpu->limits.max_shmem_size = vk->limits.maxComputeSharedMemorySize; gpu->limits.max_group_threads = vk->limits.maxComputeWorkGroupInvocations; for (int i = 0; i < 3; i++) { gpu->limits.max_group_size[i] = vk->limits.maxComputeWorkGroupSize[i]; gpu->limits.max_dispatch[i] = vk->limits.maxComputeWorkGroupCount[i]; } // If we have more compute queues than graphics queues, we probably // want to be using them. (This seems mostly relevant for AMD) if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues) gpu->caps |= PL_GPU_CAP_PARALLEL_COMPUTE; } if (!vk->features.shaderImageGatherExtended) { gpu->limits.min_gather_offset = 0; gpu->limits.max_gather_offset = 0; } vk_setup_formats(gpu); // Create the dispatch last, after any setup of `gpu` is done p->dp = pl_dispatch_create(vk->ctx, gpu); pl_gpu_print_info(gpu, PL_LOG_INFO); pl_gpu_print_formats(gpu, PL_LOG_DEBUG); return gpu; error: vk_destroy_ra(gpu); return NULL; } // Boilerplate wrapper around vkCreateRenderPass to ensure passes remain // compatible. The renderpass will automatically transition the image out of // initialLayout and into finalLayout. static VkResult vk_create_render_pass(VkDevice dev, const struct pl_fmt *fmt, VkAttachmentLoadOp loadOp, VkImageLayout initialLayout, VkImageLayout finalLayout, VkRenderPass *out) { const struct vk_format *vk_fmt = fmt->priv; VkRenderPassCreateInfo rinfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .format = fmt->emulated ? vk_fmt->emufmt : vk_fmt->ifmt, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = loadOp, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = initialLayout, .finalLayout = finalLayout, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }, }; return vkCreateRenderPass(dev, &rinfo, VK_ALLOC, out); } // For pl_tex.priv struct pl_tex_vk { bool external_img; bool may_invalidate; enum queue_type transfer_queue; VkImageType type; VkImage img; struct vk_memslice mem; // for sampling VkImageView view; VkSampler sampler; // for rendering VkFramebuffer framebuffer; // for transfers struct pl_buf_pool pbo_write; struct pl_buf_pool pbo_read; // for transfer emulation using texel buffers const struct pl_fmt *texel_fmt; struct pl_buf_pool texel_write; struct pl_buf_pool texel_read; // "current" metadata, can change during the course of execution VkImageLayout current_layout; VkAccessFlags current_access; // the signal guards reuse, and can be NULL struct vk_signal *sig; VkPipelineStageFlags sig_stage; VkSemaphore *ext_deps; // external semaphore, not owned by the pl_tex int num_ext_deps; }; void pl_tex_vk_external_dep(const struct pl_gpu *gpu, const struct pl_tex *tex, VkSemaphore external_dep) { struct pl_tex_vk *tex_vk = tex->priv; TARRAY_APPEND(tex_vk, tex_vk->ext_deps, tex_vk->num_ext_deps, external_dep); } // Small helper to ease image barrier creation. if `discard` is set, the contents // of the image will be undefined after the barrier static void tex_barrier(const struct pl_gpu *gpu, struct vk_cmd *cmd, const struct pl_tex *tex, VkPipelineStageFlags stage, VkAccessFlags newAccess, VkImageLayout newLayout) { struct vk_ctx *vk = pl_vk_get(gpu); struct pl_tex_vk *tex_vk = tex->priv; for (int i = 0; i < tex_vk->num_ext_deps; i++) vk_cmd_dep(cmd, tex_vk->ext_deps[i], stage); tex_vk->num_ext_deps = 0; VkImageMemoryBarrier imgBarrier = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .oldLayout = tex_vk->current_layout, .newLayout = newLayout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcAccessMask = tex_vk->current_access, .dstAccessMask = newAccess, .image = tex_vk->img, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }, }; if (tex_vk->may_invalidate) { tex_vk->may_invalidate = false; imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; imgBarrier.srcAccessMask = 0; } VkEvent event = NULL; enum vk_wait_type type = vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event); bool need_trans = tex_vk->current_layout != newLayout || tex_vk->current_access != newAccess; // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation // that for us means we don't need to perform the actual transition if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) { switch (type) { case VK_WAIT_NONE: // No synchronization required, so we can safely transition out of // VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT imgBarrier.srcAccessMask = 0; vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier); break; case VK_WAIT_BARRIER: // Regular pipeline barrier is required vkCmdPipelineBarrier(cmd->buf, tex_vk->sig_stage, stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier); break; case VK_WAIT_EVENT: // We can/should use the VkEvent for synchronization vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage, stage, 0, NULL, 0, NULL, 1, &imgBarrier); break; } } tex_vk->current_layout = newLayout; tex_vk->current_access = newAccess; } static void tex_signal(const struct pl_gpu *gpu, struct vk_cmd *cmd, const struct pl_tex *tex, VkPipelineStageFlags stage) { struct pl_tex_vk *tex_vk = tex->priv; struct vk_ctx *vk = pl_vk_get(gpu); pl_assert(!tex_vk->sig); tex_vk->sig = vk_cmd_signal(vk, cmd, stage); tex_vk->sig_stage = stage; } static void vk_tex_destroy(const struct pl_gpu *gpu, struct pl_tex *tex) { if (!tex) return; struct vk_ctx *vk = pl_vk_get(gpu); struct pl_tex_vk *tex_vk = tex->priv; struct pl_vk *p = gpu->priv; pl_buf_pool_uninit(gpu, &tex_vk->texel_write); pl_buf_pool_uninit(gpu, &tex_vk->texel_read); pl_buf_pool_uninit(gpu, &tex_vk->pbo_write); pl_buf_pool_uninit(gpu, &tex_vk->pbo_read); vk_signal_destroy(vk, &tex_vk->sig); vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, VK_ALLOC); vkDestroySampler(vk->dev, tex_vk->sampler, VK_ALLOC); vkDestroyImageView(vk->dev, tex_vk->view, VK_ALLOC); if (!tex_vk->external_img) { vkDestroyImage(vk->dev, tex_vk->img, VK_ALLOC); vk_free_memslice(p->alloc, tex_vk->mem); } talloc_free(tex); } MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct pl_tex); static const VkFilter filters[] = { [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, }; // Initializes non-VkImage values like the image view, samplers, etc. static bool vk_init_image(const struct pl_gpu *gpu, const struct pl_tex *tex) { struct vk_ctx *vk = pl_vk_get(gpu); const struct pl_tex_params *params = &tex->params; struct pl_tex_vk *tex_vk = tex->priv; pl_assert(tex_vk->img); tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; tex_vk->current_access = 0; tex_vk->transfer_queue = GRAPHICS; // Always use the transfer pool if available, for efficiency if ((params->host_writable || params->host_readable) && vk->pool_transfer) tex_vk->transfer_queue = TRANSFER; // For emulated formats: force usage of the compute queue, because we // can't properly track cross-queue dependencies for buffers (yet?) if (params->format->emulated) tex_vk->transfer_queue = COMPUTE; bool ret = false; VkRenderPass dummyPass = NULL; if (params->sampleable || params->renderable) { static const VkImageViewType viewType[] = { [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, }; const struct vk_format *fmt = params->format->priv; VkImageViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = tex_vk->img, .viewType = viewType[tex_vk->type], .format = params->format->emulated ? fmt->emufmt : fmt->ifmt, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }, }; VK(vkCreateImageView(vk->dev, &vinfo, VK_ALLOC, &tex_vk->view)); } if (params->sampleable) { static const VkSamplerAddressMode modes[] = { [PL_TEX_ADDRESS_CLAMP] = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, [PL_TEX_ADDRESS_REPEAT] = VK_SAMPLER_ADDRESS_MODE_REPEAT, [PL_TEX_ADDRESS_MIRROR] = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, }; VkSamplerCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = filters[params->sample_mode], .minFilter = filters[params->sample_mode], .addressModeU = modes[params->address_mode], .addressModeV = modes[params->address_mode], .addressModeW = modes[params->address_mode], .maxAnisotropy = 1.0, }; VK(vkCreateSampler(vk->dev, &sinfo, VK_ALLOC, &tex_vk->sampler)); } if (params->renderable) { // Framebuffers need to be created against a specific render pass // layout, so we need to temporarily create a skeleton/dummy render // pass for vulkan to figure out the compatibility VK(vk_create_render_pass(vk->dev, params->format, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &dummyPass)); VkFramebufferCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .renderPass = dummyPass, .attachmentCount = 1, .pAttachments = &tex_vk->view, .width = tex->params.w, .height = tex->params.h, .layers = 1, }; if (finfo.width > vk->limits.maxFramebufferWidth || finfo.height > vk->limits.maxFramebufferHeight) { PL_ERR(gpu, "Framebuffer of size %dx%d exceeds the maximum allowed " "dimensions: %dx%d", finfo.width, finfo.height, vk->limits.maxFramebufferWidth, vk->limits.maxFramebufferHeight); goto error; } VK(vkCreateFramebuffer(vk->dev, &finfo, VK_ALLOC, &tex_vk->framebuffer)); } ret = true; error: vkDestroyRenderPass(vk->dev, dummyPass, VK_ALLOC); return ret; } static const struct pl_tex *vk_tex_create(const struct pl_gpu *gpu, const struct pl_tex_params *params) { struct vk_ctx *vk = pl_vk_get(gpu); struct pl_vk *p = gpu->priv; struct pl_tex *tex = talloc_zero(NULL, struct pl_tex); tex->params = *params; tex->params.initial_data = NULL; struct pl_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct pl_tex_vk); const struct vk_format *fmt = params->format->priv; switch (pl_tex_params_dimension(*params)) { case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; default: abort(); } if (params->format->emulated) { tex_vk->texel_fmt = pl_find_fmt(gpu, params->format->type, 1, 0, params->format->host_bits[0], PL_FMT_CAP_TEXEL_UNIFORM); if (!tex_vk->texel_fmt) { PL_ERR(gpu, "Failed picking texel format for emulated texture!"); goto error; } // Our format emulation requires storage image support. In order to // make a bunch of checks happy, just mark it off as storable (and also // enable VK_IMAGE_USAGE_STORAGE_BIT, which we do below) tex->params.storable = true; } VkImageUsageFlags usage = 0; if (params->sampleable) usage |= VK_IMAGE_USAGE_SAMPLED_BIT; if (params->renderable) usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; if (params->storable || params->format->emulated) usage |= VK_IMAGE_USAGE_STORAGE_BIT; if (params->host_readable || params->blit_src) usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (params->host_writable || params->blit_dst || params->initial_data) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; // Double-check physical image format limits and fail if invalid VkImageFormatProperties iprop; VkFormat ifmt = params->format->emulated ? fmt->emufmt : fmt->ifmt; VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, ifmt, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0, &iprop); if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { return NULL; } else { VK_ASSERT(res, "Querying image format properties"); } VkExtent3D max = iprop.maxExtent; if (params->w > max.width || params->h > max.height || params->d > max.depth) { PL_ERR(gpu, "Requested image size %dx%dx%d exceeds the maximum allowed " "dimensions %dx%dx%d for vulkan image format %x", params->w, params->h, params->d, max.width, max.height, max.depth, (unsigned) fmt->ifmt); return NULL; } // FIXME: Since we can't keep track of queue family ownership properly, // and we don't know in advance what types of queue families this image // will belong to, we're forced to share all of our images between all // command pools. uint32_t qfs[3] = {0}; for (int i = 0; i < vk->num_pools; i++) qfs[i] = vk->pools[i]->qf; VkImageCreateInfo iinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = tex_vk->type, .format = ifmt, .extent = (VkExtent3D) { .width = params->w, .height = PL_MAX(1, params->h), .depth = PL_MAX(1, params->d) }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->num_pools, .pQueueFamilyIndices = qfs, }; VK(vkCreateImage(vk->dev, &iinfo, VK_ALLOC, &tex_vk->img)); VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; VkMemoryRequirements reqs; vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs); struct vk_memslice *mem = &tex_vk->mem; if (!vk_malloc_generic(p->alloc, reqs, memFlags, mem)) goto error; VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); if (!vk_init_image(gpu, tex)) goto error; if (params->initial_data) { struct pl_tex_transfer_params ul_params = { .tex = tex, .ptr = (void *) params->initial_data, .rc = { 0, 0, 0, params->w, params->h, params->d }, .stride_w = params->w, .stride_h = params->h, }; // Since we re-use GPU helpers which require writable images, just fake it bool writable = tex->params.host_writable; tex->params.host_writable = true; if (!pl_tex_upload(gpu, &ul_params)) goto error; tex->params.host_writable = writable; } return tex; error: vk_tex_destroy(gpu, tex); return NULL; } static void vk_tex_invalidate(const struct pl_gpu *gpu, const struct pl_tex *tex) { struct pl_tex_vk *tex_vk = tex->priv; tex_vk->may_invalidate = true; } static void vk_tex_clear(const struct pl_gpu *gpu, const struct pl_tex *tex, const float color[4]) { struct pl_tex_vk *tex_vk = tex->priv; struct vk_cmd *cmd = vk_require_cmd(gpu, GRAPHICS); if (!cmd) return; tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VkClearColorValue clearColor = {0}; for (int c = 0; c < 4; c++) clearColor.float32[c] = color[c]; static const VkImageSubresourceRange range = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }; vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout, &clearColor, 1, &range); tex_signal(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); } static void vk_tex_blit(const struct pl_gpu *gpu, const struct pl_tex *dst, const struct pl_tex *src, struct pl_rect3d dst_rc, struct pl_rect3d src_rc) { struct pl_tex_vk *src_vk = src->priv; struct pl_tex_vk *dst_vk = dst->priv; struct vk_cmd *cmd = vk_require_cmd(gpu, GRAPHICS); if (!cmd) return; tex_barrier(gpu, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); tex_barrier(gpu, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); static const VkImageSubresourceLayers layers = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }; // When the blit operation doesn't require scaling, we can use the more // efficient vkCmdCopyImage instead of vkCmdBlitImage if (pl_rect3d_eq(src_rc, dst_rc)) { pl_rect3d_normalize(&src_rc); pl_rect3d_normalize(&src_rc); VkImageCopy region = { .srcSubresource = layers, .dstSubresource = layers, .srcOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, .dstOffset = {dst_rc.x0, dst_rc.y0, dst_rc.z0}, .extent = { pl_rect_w(src_rc), pl_rect_h(src_rc), pl_rect_d(src_rc), }, }; vkCmdCopyImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img, dst_vk->current_layout, 1, ®ion); } else { VkImageBlit region = { .srcSubresource = layers, .dstSubresource = layers, .srcOffsets = {{src_rc.x0, src_rc.y0, src_rc.z0}, {src_rc.x1, src_rc.y1, src_rc.z1}}, .dstOffsets = {{dst_rc.x0, dst_rc.y0, src_rc.z0}, {dst_rc.x1, dst_rc.y1, src_rc.z1}}, }; vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img, dst_vk->current_layout, 1, ®ion, filters[src->params.sample_mode]); } tex_signal(gpu, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT); tex_signal(gpu, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT); } const struct pl_tex *pl_vk_wrap_swimg(const struct pl_gpu *gpu, VkImage vkimg, VkSwapchainCreateInfoKHR info) { struct pl_tex *tex = NULL; const struct pl_fmt *format = NULL; for (int i = 0; i < gpu->num_formats; i++) { const struct vk_format *fmt = gpu->formats[i]->priv; if (fmt->ifmt == info.imageFormat) { format = gpu->formats[i]; break; } } if (!format) { PL_ERR(gpu, "Could not find pl_fmt suitable for wrapped swchain image " "with surface format 0x%x\n", (unsigned) info.imageFormat); goto error; } tex = talloc_zero(NULL, struct pl_tex); tex->params = (struct pl_tex_params) { .format = format, .w = info.imageExtent.width, .h = info.imageExtent.height, .sampleable = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT), .renderable = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), .storable = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT), .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_writable = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_readable = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), }; struct pl_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct pl_tex_vk); tex_vk->type = VK_IMAGE_TYPE_2D; tex_vk->external_img = true; tex_vk->img = vkimg; if (!vk_init_image(gpu, tex)) goto error; return tex; error: vk_tex_destroy(gpu, tex); return NULL; } // For pl_buf.priv struct pl_buf_vk { struct vk_bufslice slice; int refcount; // 1 = object allocated but not in use, > 1 = in use bool needs_flush; enum queue_type update_queue; VkBufferView view; // for texel buffers // "current" metadata, can change during course of execution VkPipelineStageFlags current_stage; VkAccessFlags current_access; }; #define PL_VK_BUF_VERTEX PL_BUF_PRIVATE static void vk_buf_deref(const struct pl_gpu *gpu, struct pl_buf *buf) { if (!buf) return; struct vk_ctx *vk = pl_vk_get(gpu); struct pl_buf_vk *buf_vk = buf->priv; struct pl_vk *p = gpu->priv; if (--buf_vk->refcount == 0) { vkDestroyBufferView(vk->dev, buf_vk->view, VK_ALLOC); vk_free_memslice(p->alloc, buf_vk->slice.mem); talloc_free(buf); } } // visible_writes: whether or not this buffer access consistites a write to the // buffer that modifies the contents in a way that the host should be able to // see. This includes any synchronization necessary to ensure the writes are // made visible to the host. static void buf_barrier(const struct pl_gpu *gpu, struct vk_cmd *cmd, const struct pl_buf *buf, VkPipelineStageFlags newStage, VkAccessFlags newAccess, int offset, size_t size, bool visible_writes) { struct pl_buf_vk *buf_vk = buf->priv; VkBufferMemoryBarrier buffBarrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcAccessMask = buf_vk->current_access, .dstAccessMask = newAccess, .buffer = buf_vk->slice.buf, .offset = offset, .size = size, }; if (buf_vk->needs_flush || buf->params.host_mapped) { buffBarrier.srcAccessMask |= VK_ACCESS_HOST_WRITE_BIT; buf_vk->current_stage |= VK_PIPELINE_STAGE_HOST_BIT; buf_vk->needs_flush = false; } if (visible_writes && (buf->params.host_readable || buf->params.host_mapped)) { buffBarrier.dstAccessMask |= VK_ACCESS_HOST_READ_BIT; newStage |= VK_PIPELINE_STAGE_HOST_BIT; } if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) { vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0, 0, NULL, 1, &buffBarrier, 0, NULL); } buf_vk->current_stage = newStage; buf_vk->current_access = newAccess; buf_vk->refcount++; vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, gpu, buf); } #define vk_buf_destroy vk_buf_deref MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct pl_buf); static void vk_buf_write(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t offset, const void *data, size_t size) { struct pl_buf_vk *buf_vk = buf->priv; // For host-mapped buffers, we can just directly memcpy the buffer contents. // Otherwise, we can update the buffer from the GPU using a command buffer. if (buf_vk->slice.data) { uintptr_t addr = (uintptr_t) buf_vk->slice.data + (ptrdiff_t) offset; memcpy((void *) addr, data, size); buf_vk->needs_flush = true; } else { struct vk_cmd *cmd = vk_require_cmd(gpu, buf_vk->update_queue); if (!cmd) { PL_ERR(gpu, "Failed updating buffer!"); return; } buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, offset, size, true); VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset; vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data); } } static bool vk_buf_read(const struct pl_gpu *gpu, const struct pl_buf *buf, size_t offset, void *dest, size_t size) { struct pl_buf_vk *buf_vk = buf->priv; pl_assert(buf_vk->slice.data); uintptr_t addr = (uintptr_t) buf_vk->slice.data + (ptrdiff_t) offset; memcpy(dest, (void *) addr, size); return true; } static const struct pl_buf *vk_buf_create(const struct pl_gpu *gpu, const struct pl_buf_params *params) { struct vk_ctx *vk = pl_vk_get(gpu); struct pl_vk *p = gpu->priv; struct pl_buf *buf = talloc_zero(NULL, struct pl_buf); buf->params = *params; buf->params.initial_data = NULL; struct pl_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct pl_buf_vk); buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; buf_vk->current_access = 0; buf_vk->refcount = 1; VkBufferUsageFlags bufFlags = 0; VkMemoryPropertyFlags memFlags = 0; VkDeviceSize align = 4; // alignment 4 is needed for buf_update bool is_texel = false; switch (params->type) { case PL_BUF_TEX_TRANSFER: bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; // Use TRANSFER-style updates for large enough buffers for efficiency if (params->size > 1024*1024) // 1 MB buf_vk->update_queue = TRANSFER; break; case PL_BUF_UNIFORM: bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = PL_ALIGN2(align, vk->limits.minUniformBufferOffsetAlignment); break; case PL_BUF_STORAGE: bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = PL_ALIGN2(align, vk->limits.minStorageBufferOffsetAlignment); buf_vk->update_queue = COMPUTE; break; case PL_BUF_TEXEL_UNIFORM: // for emulated upload bufFlags |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = PL_ALIGN2(align, vk->limits.minTexelBufferOffsetAlignment); align = PL_ALIGN2(align, vk->limits.optimalBufferCopyOffsetAlignment); is_texel = true; break; case PL_BUF_TEXEL_STORAGE: // for emulated download bufFlags |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = PL_ALIGN2(align, vk->limits.minTexelBufferOffsetAlignment); align = PL_ALIGN2(align, vk->limits.optimalBufferCopyOffsetAlignment); buf_vk->update_queue = COMPUTE; is_texel = true; break; case PL_VK_BUF_VERTEX: bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; default: abort(); } if (params->host_writable || params->initial_data) { bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; align = PL_ALIGN2(align, vk->limits.optimalBufferCopyOffsetAlignment); } if (params->host_mapped || params->host_readable) { memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; } if (!vk_malloc_buffer(p->alloc, bufFlags, memFlags, params->size, align, &buf_vk->slice)) goto error; if (params->host_mapped) buf->data = buf_vk->slice.data; if (is_texel) { const struct vk_format *vk_fmt = params->format->priv; VkBufferViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, .buffer = buf_vk->slice.buf, .format = vk_fmt->ifmt, .offset = buf_vk->slice.mem.offset, .range = params->size, }; VK(vkCreateBufferView(vk->dev, &vinfo, VK_ALLOC, &buf_vk->view)); } if (params->initial_data) vk_buf_write(gpu, buf, 0, params->initial_data, params->size); return buf; error: vk_buf_destroy(gpu, buf); return NULL; } static bool vk_buf_poll(const struct pl_gpu *gpu, const struct pl_buf *buf, uint64_t timeout) { struct vk_ctx *vk = pl_vk_get(gpu); struct pl_buf_vk *buf_vk = buf->priv; if (timeout > 0) { vk_submit(gpu); vk_poll_commands(vk, timeout); } return buf_vk->refcount > 1; } static bool vk_tex_upload(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = gpu->priv; const struct pl_tex *tex = params->tex; struct pl_tex_vk *tex_vk = tex->priv; if (!params->buf) return pl_tex_upload_pbo(gpu, &tex_vk->pbo_write, params); pl_assert(params->buf); const struct pl_buf *buf = params->buf; struct pl_buf_vk *buf_vk = buf->priv; struct pl_rect3d rc = params->rc; size_t size = pl_tex_transfer_size(params); if (tex->params.format->emulated) { // Copy the buffer into a texel buffer for software texture blit purposes const struct pl_buf *tbuf; tbuf = pl_buf_pool_get(gpu, &tex_vk->texel_write, &(struct pl_buf_params) { .type = PL_BUF_TEXEL_UNIFORM, .size = size, .format = tex_vk->texel_fmt, }); if (!tbuf) { PL_ERR(gpu, "Failed creating texel buffer for emulated tex upload!"); goto error; } // Note: Make sure to run vk_require_cmd *after* pl_buf_pool_get, since // the former could imply polling which could imply submitting the // command we just required! struct vk_cmd *cmd = vk_require_cmd(gpu, tex_vk->transfer_queue); if (!cmd) goto error; struct pl_buf_vk *tbuf_vk = tbuf->priv; VkBufferCopy region = { .srcOffset = buf_vk->slice.mem.offset + params->buf_offset, .dstOffset = tbuf_vk->slice.mem.offset, .size = size, }; buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, region.srcOffset, size, false); buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, region.dstOffset, size, false); vkCmdCopyBuffer(cmd->buf, buf_vk->slice.buf, tbuf_vk->slice.buf, 1, ®ion); struct pl_tex_transfer_params fixed = *params; fixed.buf = tbuf; fixed.buf_offset = 0; return pl_tex_upload_texel(gpu, p->dp, &fixed); } else { struct vk_cmd *cmd = vk_require_cmd(gpu, tex_vk->transfer_queue); if (!cmd) goto error; VkBufferImageCopy region = { .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset, .bufferRowLength = params->stride_w, .bufferImageHeight = params->stride_h, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }, }; buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size, false); tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img, tex_vk->current_layout, 1, ®ion); tex_signal(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); } return true; error: return false; } static bool vk_tex_download(const struct pl_gpu *gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = gpu->priv; const struct pl_tex *tex = params->tex; struct pl_tex_vk *tex_vk = tex->priv; if (!params->buf) return pl_tex_download_pbo(gpu, &tex_vk->pbo_read, params); pl_assert(params->buf); const struct pl_buf *buf = params->buf; struct pl_buf_vk *buf_vk = buf->priv; struct pl_rect3d rc = params->rc; size_t size = pl_tex_transfer_size(params); if (tex->params.format->emulated) { // Blit the image into a texel storage buffer using compute shaders const struct pl_buf *tbuf; tbuf = pl_buf_pool_get(gpu, &tex_vk->texel_read, &(struct pl_buf_params) { .type = PL_BUF_TEXEL_STORAGE, .size = size, .format = tex_vk->texel_fmt, }); if (!tbuf) { PL_ERR(gpu, "Failed creating texel buffer for emulated tex download!"); goto error; } struct pl_tex_transfer_params fixed = *params; fixed.buf = tbuf; fixed.buf_offset = 0; if (!pl_tex_download_texel(gpu, p->dp, &fixed)) goto error; struct vk_cmd *cmd = vk_require_cmd(gpu, tex_vk->transfer_queue); if (!cmd) goto error; struct pl_buf_vk *tbuf_vk = tbuf->priv; VkBufferCopy region = { .srcOffset = tbuf_vk->slice.mem.offset, .dstOffset = buf_vk->slice.mem.offset + params->buf_offset, .size = size, }; buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, region.srcOffset, size, false); buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, region.dstOffset, size, true); vkCmdCopyBuffer(cmd->buf, tbuf_vk->slice.buf, buf_vk->slice.buf, 1, ®ion); } else { struct vk_cmd *cmd = vk_require_cmd(gpu, tex_vk->transfer_queue); if (!cmd) goto error; VkBufferImageCopy region = { .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset, .bufferRowLength = params->stride_w, .bufferImageHeight = params->stride_h, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, }, }; buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, region.bufferOffset, size, true); tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); vkCmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->current_layout, buf_vk->slice.buf, 1, ®ion); tex_signal(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); } return true; error: return false; } static int vk_desc_namespace(const struct pl_gpu *gpu, enum pl_desc_type type) { return 0; } // For pl_pass.priv struct pl_pass_vk { // Pipeline / render pass VkPipeline pipe; VkPipelineLayout pipeLayout; VkRenderPass renderPass; VkImageLayout initialLayout; VkImageLayout finalLayout; // Descriptor set (bindings) VkDescriptorSetLayout dsLayout; VkDescriptorPool dsPool; // To keep track of which descriptor sets are and aren't available, we // allocate a fixed number and use a bitmask of all available sets. VkDescriptorSet dss[16]; uint16_t dmask; // Vertex buffers (vertices) struct pl_buf_pool vbo; // For updating VkWriteDescriptorSet *dswrite; VkDescriptorImageInfo *dsiinfo; VkDescriptorBufferInfo *dsbinfo; }; static void vk_pass_destroy(const struct pl_gpu *gpu, struct pl_pass *pass) { if (!pass) return; struct vk_ctx *vk = pl_vk_get(gpu); struct pl_pass_vk *pass_vk = pass->priv; pl_buf_pool_uninit(gpu, &pass_vk->vbo); vkDestroyPipeline(vk->dev, pass_vk->pipe, VK_ALLOC); vkDestroyRenderPass(vk->dev, pass_vk->renderPass, VK_ALLOC); vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, VK_ALLOC); vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, VK_ALLOC); vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, VK_ALLOC); talloc_free(pass); } MAKE_LAZY_DESTRUCTOR(vk_pass_destroy, struct pl_pass); static const VkDescriptorType dsType[] = { [PL_DESC_SAMPLED_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, [PL_DESC_STORAGE_IMG] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, [PL_DESC_BUF_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, [PL_DESC_BUF_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, [PL_DESC_BUF_TEXEL_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, [PL_DESC_BUF_TEXEL_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, }; static const char vk_cache_magic[4] = {'R','A','V','K'}; static const int vk_cache_version = 2; struct vk_cache_header { char magic[sizeof(vk_cache_magic)]; int cache_version; char compiler[SPIRV_NAME_MAX_LEN]; int compiler_version; size_t vert_spirv_len; size_t frag_spirv_len; size_t comp_spirv_len; size_t pipecache_len; }; static bool vk_use_cached_program(const struct pl_pass_params *params, const struct spirv_compiler *spirv, struct bstr *vert_spirv, struct bstr *frag_spirv, struct bstr *comp_spirv, struct bstr *pipecache) { struct bstr cache = { .start = (void*) params->cached_program, .len =params->cached_program_len, }; if (cache.len < sizeof(struct vk_cache_header)) return false; struct vk_cache_header *header = (struct vk_cache_header *)cache.start; cache = bstr_cut(cache, sizeof(*header)); if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0) return false; if (header->cache_version != vk_cache_version) return false; if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) return false; if (header->compiler_version != spirv->compiler_version) return false; #define GET(ptr) \ if (cache.len < header->ptr##_len) \ return false; \ *ptr = bstr_splice(cache, 0, header->ptr##_len); \ cache = bstr_cut(cache, ptr->len); GET(vert_spirv); GET(frag_spirv); GET(comp_spirv); GET(pipecache); return true; } static VkResult vk_compile_glsl(const struct pl_gpu *gpu, void *tactx, enum glsl_shader_stage type, const char *glsl, struct bstr *spirv) { struct pl_vk *p = gpu->priv; static const char *shader_names[] = { [GLSL_SHADER_VERTEX] = "vertex", [GLSL_SHADER_FRAGMENT] = "fragment", [GLSL_SHADER_COMPUTE] = "compute", }; PL_DEBUG(gpu, "%s shader source:", shader_names[type]); pl_msg_source(gpu->ctx, PL_LOG_DEBUG, glsl); if (!p->spirv->impl->compile_glsl(p->spirv, tactx, type, glsl, spirv)) { pl_msg_source(gpu->ctx, PL_LOG_ERR, glsl); return VK_ERROR_INITIALIZATION_FAILED; } return VK_SUCCESS; } static const VkShaderStageFlags stageFlags[] = { [PL_PASS_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT, [PL_PASS_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, }; static const struct pl_pass *vk_pass_create(const struct pl_gpu *gpu, const struct pl_pass_params *params) { struct vk_ctx *vk = pl_vk_get(gpu); struct pl_vk *p = gpu->priv; bool success = false; struct pl_pass *pass = talloc_zero(NULL, struct pl_pass); pass->params = pl_pass_params_copy(pass, params); struct pl_pass_vk *pass_vk = pass->priv = talloc_zero(pass, struct pl_pass_vk); pass_vk->dmask = -1; // all descriptors available int num_desc = params->num_descriptors; pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_desc); pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_desc); pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_desc); // temporary allocations/objects void *tmp = talloc_new(NULL); VkPipelineCache pipeCache = NULL; VkShaderModule vert_shader = NULL; VkShaderModule frag_shader = NULL; VkShaderModule comp_shader = NULL; #define NUM_DS (PL_ARRAY_SIZE(pass_vk->dss)) static int dsSize[PL_DESC_TYPE_COUNT] = {0}; VkDescriptorSetLayoutBinding *bindings = talloc_array(tmp, VkDescriptorSetLayoutBinding, num_desc); for (int i = 0; i < num_desc; i++) { struct pl_desc *desc = ¶ms->descriptors[i]; dsSize[desc->type]++; bindings[i] = (VkDescriptorSetLayoutBinding) { .binding = desc->binding, .descriptorType = dsType[desc->type], .descriptorCount = 1, .stageFlags = stageFlags[params->type], }; } VkDescriptorPoolSize *dsPoolSizes = NULL; int poolSizeCount = 0; for (enum pl_desc_type t = 0; t < PL_DESC_TYPE_COUNT; t++) { if (dsSize[t] > 0) { VkDescriptorPoolSize dssize = { .type = dsType[t], .descriptorCount = dsSize[t] * NUM_DS, }; TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize); } } if (poolSizeCount) { VkDescriptorPoolCreateInfo pinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = NUM_DS, .pPoolSizes = dsPoolSizes, .poolSizeCount = poolSizeCount, }; VK(vkCreateDescriptorPool(vk->dev, &pinfo, VK_ALLOC, &pass_vk->dsPool)); } VkDescriptorSetLayoutCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pBindings = bindings, .bindingCount = num_desc, }; VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, VK_ALLOC, &pass_vk->dsLayout)); VkDescriptorSetLayout layouts[NUM_DS]; for (int i = 0; i < NUM_DS; i++) layouts[i] = pass_vk->dsLayout; if (pass_vk->dsPool) { VkDescriptorSetAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = pass_vk->dsPool, .descriptorSetCount = NUM_DS, .pSetLayouts = layouts, }; VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss)); } VkPipelineLayoutCreateInfo linfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &pass_vk->dsLayout, .pushConstantRangeCount = params->push_constants_size ? 1 : 0, .pPushConstantRanges = &(VkPushConstantRange){ .stageFlags = stageFlags[params->type], .offset = 0, .size = params->push_constants_size, }, }; VK(vkCreatePipelineLayout(vk->dev, &linfo, VK_ALLOC, &pass_vk->pipeLayout)); struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0}; if (vk_use_cached_program(params, p->spirv, &vert, &frag, &comp, &pipecache)) { PL_DEBUG(gpu, "Using cached SPIR-V and VkPipeline"); } else { pipecache.len = 0; switch (params->type) { case PL_PASS_RASTER: VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_VERTEX, params->vertex_shader, &vert)); VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_FRAGMENT, params->glsl_shader, &frag)); comp.len = 0; break; case PL_PASS_COMPUTE: VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_COMPUTE, params->glsl_shader, &comp)); frag.len = 0; vert.len = 0; break; default: abort(); } } VkPipelineCacheCreateInfo pcinfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, .pInitialData = pipecache.start, .initialDataSize = pipecache.len, }; VK(vkCreatePipelineCache(vk->dev, &pcinfo, VK_ALLOC, &pipeCache)); VkShaderModuleCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, }; switch (params->type) { case PL_PASS_RASTER: { sinfo.pCode = (uint32_t *) vert.start; sinfo.codeSize = vert.len; VK(vkCreateShaderModule(vk->dev, &sinfo, VK_ALLOC, &vert_shader)); sinfo.pCode = (uint32_t *) frag.start; sinfo.codeSize = frag.len; VK(vkCreateShaderModule(vk->dev, &sinfo, VK_ALLOC, &frag_shader)); VkVertexInputAttributeDescription *attrs = talloc_array(tmp, VkVertexInputAttributeDescription, params->num_vertex_attribs); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; const struct vk_format *fmt_vk = va->fmt->priv; attrs[i] = (VkVertexInputAttributeDescription) { .binding = 0, .location = va->location, .offset = va->offset, .format = fmt_vk->ifmt, }; } pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; // Figure out which case we should try and optimize for based on some // dumb heuristics. Extremely naive, but good enough for most cases. struct pl_tex_params texparams = params->target_dummy.params; if (texparams.storable) pass_vk->finalLayout = VK_IMAGE_LAYOUT_GENERAL; if (texparams.blit_src || texparams.host_readable) pass_vk->finalLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; if (texparams.blit_dst || texparams.host_writable) pass_vk->finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; if (texparams.sampleable) pass_vk->finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; struct pl_tex_vk *target_vk = params->target_dummy.priv; if (target_vk) { // If we have a real texture as the target dummy, we can set the // initial layout based on what the texture is actually in at the // moment. pass_vk->initialLayout = target_vk->current_layout; } else { // Assume we're ping-ponging between a render pass and some other // operation. This is the most likely scenario, or rather, the only // one we can really optimize for. pass_vk->initialLayout = pass_vk->finalLayout; } VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; // If we're blending, then we need to explicitly load the previous // contents of the color attachment if (pass->params.blend_params) loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // If we're ignoring the FBO, we don't need to load or transition if (!pass->params.load_target) { pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; } VK(vk_create_render_pass(vk->dev, texparams.format, loadOp, pass_vk->initialLayout, pass_vk->finalLayout, &pass_vk->renderPass)); static const VkBlendFactor blendFactors[] = { [PL_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, [PL_BLEND_ONE] = VK_BLEND_FACTOR_ONE, [PL_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, }; VkPipelineColorBlendAttachmentState blendState = { .colorBlendOp = VK_BLEND_OP_ADD, .alphaBlendOp = VK_BLEND_OP_ADD, .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, }; const struct pl_blend_params *blend = params->blend_params; if (blend) { blendState.blendEnable = true; blendState.srcColorBlendFactor = blendFactors[blend->src_rgb]; blendState.dstColorBlendFactor = blendFactors[blend->dst_rgb]; blendState.srcAlphaBlendFactor = blendFactors[blend->src_alpha]; blendState.dstAlphaBlendFactor = blendFactors[blend->dst_alpha]; } static const VkPrimitiveTopology topologies[] = { [PL_PRIM_TRIANGLE_LIST] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, [PL_PRIM_TRIANGLE_STRIP] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, [PL_PRIM_TRIANGLE_FAN] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, }; VkGraphicsPipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .stageCount = 2, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = vert_shader, .pName = "main", }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, .module = frag_shader, .pName = "main", } }, .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) { .binding = 0, .stride = params->vertex_stride, .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, }, .vertexAttributeDescriptionCount = params->num_vertex_attribs, .pVertexAttributeDescriptions = attrs, }, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = topologies[params->vertex_type], }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .viewportCount = 1, .scissorCount = 1, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, .lineWidth = 1.0f, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, }, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, .pAttachments = &blendState, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 2, .pDynamicStates = (VkDynamicState[]){ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, }, }, .layout = pass_vk->pipeLayout, .renderPass = pass_vk->renderPass, }; VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo, VK_ALLOC, &pass_vk->pipe)); break; } case PL_PASS_COMPUTE: { sinfo.pCode = (uint32_t *)comp.start; sinfo.codeSize = comp.len; VK(vkCreateShaderModule(vk->dev, &sinfo, VK_ALLOC, &comp_shader)); VkComputePipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = comp_shader, .pName = "main", }, .layout = pass_vk->pipeLayout, }; VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo, VK_ALLOC, &pass_vk->pipe)); break; } default: abort(); } // Update params->cached_program struct bstr cache = {0}; VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL)); cache.start = talloc_size(tmp, cache.len); VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start)); struct vk_cache_header header = { .cache_version = vk_cache_version, .compiler_version = p->spirv->compiler_version, .vert_spirv_len = vert.len, .frag_spirv_len = frag.len, .comp_spirv_len = comp.len, .pipecache_len = cache.len, }; PL_DEBUG(vk, "Pass statistics: size %zu, SPIR-V: vert %zu frag %zu comp %zu", cache.len, vert.len, frag.len, comp.len); for (int i = 0; i < PL_ARRAY_SIZE(header.magic); i++) header.magic[i] = vk_cache_magic[i]; for (int i = 0; i < sizeof(p->spirv->name); i++) header.compiler[i] = p->spirv->name[i]; struct bstr prog = {0}; bstr_xappend(pass, &prog, (struct bstr){ (char *) &header, sizeof(header) }); bstr_xappend(pass, &prog, vert); bstr_xappend(pass, &prog, frag); bstr_xappend(pass, &prog, comp); bstr_xappend(pass, &prog, cache); pass->params.cached_program = prog.start; pass->params.cached_program_len = prog.len; success = true; error: if (!success) { vk_pass_destroy(gpu, pass); pass = NULL; } #undef NUM_DS vkDestroyShaderModule(vk->dev, vert_shader, VK_ALLOC); vkDestroyShaderModule(vk->dev, frag_shader, VK_ALLOC); vkDestroyShaderModule(vk->dev, comp_shader, VK_ALLOC); vkDestroyPipelineCache(vk->dev, pipeCache, VK_ALLOC); talloc_free(tmp); return pass; } static const VkPipelineStageFlags passStages[] = { [PL_PASS_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, }; static void vk_update_descriptor(const struct pl_gpu *gpu, struct vk_cmd *cmd, const struct pl_pass *pass, struct pl_desc_binding db, VkDescriptorSet ds, int idx) { struct pl_pass_vk *pass_vk = pass->priv; struct pl_desc *desc = &pass->params.descriptors[idx]; pl_assert(ds); VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx]; *wds = (VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = ds, .dstBinding = desc->binding, .descriptorCount = 1, .descriptorType = dsType[desc->type], }; VkAccessFlags access = 0; switch (desc->access) { case PL_DESC_ACCESS_READONLY: access = VK_ACCESS_SHADER_READ_BIT; break; case PL_DESC_ACCESS_WRITEONLY: access = VK_ACCESS_SHADER_WRITE_BIT; break; case PL_DESC_ACCESS_READWRITE: access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; break; default: abort(); } switch (desc->type) { case PL_DESC_SAMPLED_TEX: { const struct pl_tex *tex = db.object; struct pl_tex_vk *tex_vk = tex->priv; tex_barrier(gpu, cmd, tex, passStages[pass->params.type], VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .sampler = tex_vk->sampler, .imageView = tex_vk->view, .imageLayout = tex_vk->current_layout, }; wds->pImageInfo = iinfo; break; } case PL_DESC_STORAGE_IMG: { const struct pl_tex *tex = db.object; struct pl_tex_vk *tex_vk = tex->priv; tex_barrier(gpu, cmd, tex, passStages[pass->params.type], access, VK_IMAGE_LAYOUT_GENERAL); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .imageView = tex_vk->view, .imageLayout = tex_vk->current_layout, }; wds->pImageInfo = iinfo; break; } case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: { const struct pl_buf *buf = db.object; struct pl_buf_vk *buf_vk = buf->priv; buf_barrier(gpu, cmd, buf, passStages[pass->params.type], access, buf_vk->slice.mem.offset, buf->params.size, access != PL_DESC_ACCESS_READONLY); VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx]; *binfo = (VkDescriptorBufferInfo) { .buffer = buf_vk->slice.buf, .offset = buf_vk->slice.mem.offset, .range = buf->params.size, }; wds->pBufferInfo = binfo; break; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: { const struct pl_buf *buf = db.object; struct pl_buf_vk *buf_vk = buf->priv; buf_barrier(gpu, cmd, buf, passStages[pass->params.type], access, buf_vk->slice.mem.offset, buf->params.size, access != PL_DESC_ACCESS_READONLY); wds->pTexelBufferView = &buf_vk->view; break; } default: abort(); } } static void vk_release_descriptor(const struct pl_gpu *gpu, struct vk_cmd *cmd, const struct pl_pass *pass, struct pl_desc_binding db, int idx) { const struct pl_desc *desc = &pass->params.descriptors[idx]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: { const struct pl_tex *tex = db.object; tex_signal(gpu, cmd, tex, passStages[pass->params.type]); break; } default: break; } } static void set_ds(struct pl_pass_vk *pass_vk, uintptr_t dsbit) { pass_vk->dmask |= dsbit; } static void vk_pass_run(const struct pl_gpu *gpu, const struct pl_pass_run_params *params) { struct vk_ctx *vk = pl_vk_get(gpu); const struct pl_pass *pass = params->pass; struct pl_pass_vk *pass_vk = pass->priv; static const enum queue_type types[] = { [PL_PASS_RASTER] = GRAPHICS, [PL_PASS_COMPUTE] = COMPUTE, }; // Wait for a free descriptor set while (!pass_vk->dmask) { PL_TRACE(gpu, "No free descriptor sets! ...blocking (slow path)"); vk_submit(gpu); vk_poll_commands(vk, 1000000); // 1ms } struct vk_cmd *cmd = vk_require_cmd(gpu, types[pass->params.type]); if (!cmd) goto error; static const VkPipelineBindPoint bindPoint[] = { [PL_PASS_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS, [PL_PASS_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE, }; vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe); VkDescriptorSet ds = NULL; for (int i = 0; i < PL_ARRAY_SIZE(pass_vk->dss); i++) { uint16_t dsbit = 1u << i; if (pass_vk->dmask & dsbit) { ds = pass_vk->dss[i]; pass_vk->dmask &= ~dsbit; // unset vk_cmd_callback(cmd, (vk_cb) set_ds, pass_vk, (void *)(uintptr_t) dsbit); break; } } for (int i = 0; i < pass->params.num_descriptors; i++) vk_update_descriptor(gpu, cmd, pass, params->desc_bindings[i], ds, i); if (pass->params.num_descriptors > 0) { vkUpdateDescriptorSets(vk->dev, pass->params.num_descriptors, pass_vk->dswrite, 0, NULL); } if (ds) { vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type], pass_vk->pipeLayout, 0, 1, &ds, 0, NULL); } if (pass->params.push_constants_size) { vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout, stageFlags[pass->params.type], 0, pass->params.push_constants_size, params->push_constants); } switch (pass->params.type) { case PL_PASS_RASTER: { const struct pl_tex *tex = params->target; struct pl_tex_vk *tex_vk = tex->priv; struct pl_buf_params vparams = { .type = PL_VK_BUF_VERTEX, .size = params->vertex_count * pass->params.vertex_stride, .host_writable = true, }; const struct pl_buf *buf = pl_buf_pool_get(gpu, &pass_vk->vbo, &vparams); if (!buf) { PL_ERR(gpu, "Failed allocating vertex buffer!"); goto error; } struct pl_buf_vk *buf_vk = buf->priv; vk_buf_write(gpu, buf, 0, params->vertex_data, vparams.size); buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, buf_vk->slice.mem.offset, vparams.size, false); vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf, &buf_vk->slice.mem.offset); tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout); VkViewport viewport = { .x = params->viewport.x0, .y = params->viewport.y0, .width = pl_rect_w(params->viewport), .height = pl_rect_h(params->viewport), }; VkRect2D scissor = { .offset = {params->scissors.x0, params->scissors.y0}, .extent = {pl_rect_w(params->scissors), pl_rect_h(params->scissors)}, }; vkCmdSetViewport(cmd->buf, 0, 1, &viewport); vkCmdSetScissor(cmd->buf, 0, 1, &scissor); VkRenderPassBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass_vk->renderPass, .framebuffer = tex_vk->framebuffer, .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}}, }; vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE); vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0); vkCmdEndRenderPass(cmd->buf); // The renderPass implicitly transitions the texture to this layout tex_vk->current_layout = pass_vk->finalLayout; tex_signal(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); break; } case PL_PASS_COMPUTE: vkCmdDispatch(cmd->buf, params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); break; default: abort(); }; for (int i = 0; i < pass->params.num_descriptors; i++) vk_release_descriptor(gpu, cmd, pass, params->desc_bindings[i], i); // flush the work so far into its own command buffer, for better // intra-frame granularity vk_submit(gpu); error: return; } static void vk_gpu_flush(const struct pl_gpu *gpu) { struct vk_ctx *vk = pl_vk_get(gpu); vk_submit(gpu); vk_flush_commands(vk); } struct vk_cmd *pl_vk_finish_frame(const struct pl_gpu *gpu, const struct pl_tex *tex) { struct pl_vk *p = gpu->priv; struct vk_cmd *cmd = vk_require_cmd(gpu, GRAPHICS); if (!cmd) return NULL; struct pl_tex_vk *tex_vk = tex->priv; pl_assert(tex_vk->external_img); tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); // Return this directly instead of going through vk_submit p->cmd = NULL; return cmd; } static struct pl_gpu_fns pl_fns_vk = { .destroy = vk_destroy_ra, .tex_create = vk_tex_create, .tex_destroy = vk_tex_destroy_lazy, .tex_invalidate = vk_tex_invalidate, .tex_clear = vk_tex_clear, .tex_blit = vk_tex_blit, .tex_upload = vk_tex_upload, .tex_download = vk_tex_download, .buf_create = vk_buf_create, .buf_destroy = vk_buf_destroy_lazy, .buf_write = vk_buf_write, .buf_read = vk_buf_read, .buf_poll = vk_buf_poll, .buf_uniform_layout = std140_layout, .buf_storage_layout = std430_layout, .push_constant_layout = std430_layout, .desc_namespace = vk_desc_namespace, .pass_create = vk_pass_create, .pass_destroy = vk_pass_destroy_lazy, .pass_run = vk_pass_run, .gpu_flush = vk_gpu_flush, }; libplacebo-0.4.0/src/vulkan/gpu.h000066400000000000000000000040401324021332500166330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../gpu.h" #include "common.h" #include "utils.h" const struct pl_gpu *pl_gpu_create_vk(struct vk_ctx *vk); // May be called on a struct ra of any type. Returns NULL if the ra is not // a vulkan ra. struct vk_ctx *pl_vk_get(const struct pl_gpu *gpu); // Allocates a pl_tex that wraps a swapchain image. The contents of the image // will be invalidated, and access to it will only be internally synchronized. // So the calling could should not do anything else with the VkImage. const struct pl_tex *pl_vk_wrap_swimg(const struct pl_gpu *gpu, VkImage vkimg, VkSwapchainCreateInfoKHR info); // Associates an external semaphore (dependency) with a pl_tex, such that this // pl_tex will not be used by the pl_vk until the external semaphore fires. void pl_tex_vk_external_dep(const struct pl_gpu *gpu, const struct pl_tex *tex, VkSemaphore external_dep); // This function finalizes rendering, transitions `tex` (which must be a // wrapped swapchain image) into a format suitable for presentation, and returns // the resulting command buffer (or NULL on error). The caller may add their // own semaphores to this command buffer, and must submit it afterwards. struct vk_cmd *pl_vk_finish_frame(const struct pl_gpu *gpu, const struct pl_tex *tex); libplacebo-0.4.0/src/vulkan/malloc.c000066400000000000000000000375711324021332500173210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "malloc.h" #include "command.h" #include "utils.h" // Controls the multiplication factor for new slab allocations. The new slab // will always be allocated such that the size of the slab is this factor times // the previous slab. Higher values make it grow faster. #define PLVK_HEAP_SLAB_GROWTH_RATE 4 // Controls the minimum slab size, to reduce the frequency at which very small // slabs would need to get allocated when allocating the first few buffers. // (Default: 1 MB) #define PLVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20) // Controls the maximum slab size, to reduce the effect of unbounded slab // growth exhausting memory. If the application needs a single allocation // that's bigger than this value, it will be allocated directly from the // device. (Default: 512 MB) #define PLVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29) // Controls the minimum free region size, to reduce thrashing the free space // map with lots of small buffers during uninit. (Default: 1 KB) #define PLVK_HEAP_MINIMUM_REGION_SIZE (1 << 10) // Represents a region of available memory struct vk_region { size_t start; // first offset in region size_t end; // first offset *not* in region }; static inline size_t region_len(struct vk_region r) { return r.end - r.start; } // A single slab represents a contiguous region of allocated memory. Actual // allocations are served as slices of this. Slabs are organized into linked // lists, which represent individual heaps. struct vk_slab { VkDeviceMemory mem; // underlying device allocation size_t size; // total size of `slab` size_t used; // number of bytes actually in use (for GC accounting) bool dedicated; // slab is allocated specifically for one object // free space map: a sorted list of memory regions that are available struct vk_region *regions; int num_regions; // optional, depends on the memory type: VkBuffer buffer; // buffer spanning the entire slab void *data; // mapped memory corresponding to `mem` }; // Represents a single memory heap. We keep track of a vk_heap for each // combination of buffer type and memory selection parameters. This shouldn't // actually be that many in practice, because some combinations simply never // occur, and others will generally be the same for the same objects. struct vk_heap { VkBufferUsageFlags usage; // the buffer usage type (or 0) VkMemoryPropertyFlags flags; // the memory type flags (or 0) uint32_t typeBits; // the memory type index requirements (or 0) struct vk_slab **slabs; // array of slabs sorted by size int num_slabs; }; // The overall state of the allocator, which keeps track of a vk_heap for each // memory type. struct vk_malloc { struct vk_ctx *vk; VkPhysicalDeviceMemoryProperties props; struct vk_heap *heaps; int num_heaps; }; static void slab_free(struct vk_ctx *vk, struct vk_slab *slab) { if (!slab) return; pl_assert(slab->used == 0); vkDestroyBuffer(vk->dev, slab->buffer, VK_ALLOC); // also implicitly unmaps the memory if needed vkFreeMemory(vk->dev, slab->mem, VK_ALLOC); PL_INFO(vk, "Freed slab of size %zu", (size_t) slab->size); talloc_free(slab); } static bool find_best_memtype(struct vk_malloc *ma, uint32_t typeBits, VkMemoryPropertyFlags flags, VkMemoryType *out_type, int *out_index) { struct vk_ctx *vk = ma->vk; // The vulkan spec requires memory types to be sorted in the "optimal" // order, so the first matching type we find will be the best/fastest one. for (int i = 0; i < ma->props.memoryTypeCount; i++) { // The memory type flags must include our properties if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags) continue; // The memory type must be supported by the requirements (bitfield) if (typeBits && !(typeBits & (1 << i))) continue; *out_type = ma->props.memoryTypes[i]; *out_index = i; return true; } PL_ERR(vk, "Found no memory type matching property flags 0x%x and type " "bits 0x%x!", (unsigned)flags, (unsigned)typeBits); return false; } static struct vk_slab *slab_alloc(struct vk_malloc *ma, struct vk_heap *heap, size_t size) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = talloc_ptrtype(NULL, slab); *slab = (struct vk_slab) { .size = size, }; TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) { .start = 0, .end = slab->size, }); VkMemoryAllocateInfo minfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = slab->size, }; uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX; if (heap->usage) { // FIXME: Since we can't keep track of queue family ownership properly, // and we don't know in advance what types of queue families this buffer // will belong to, we're forced to share all of our buffers between all // command pools. uint32_t qfs[3] = {0}; for (int i = 0; i < vk->num_pools; i++) qfs[i] = vk->pools[i]->qf; VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = slab->size, .usage = heap->usage, .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->num_pools, .pQueueFamilyIndices = qfs, }; VK(vkCreateBuffer(vk->dev, &binfo, VK_ALLOC, &slab->buffer)); VkMemoryRequirements reqs; vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs); minfo.allocationSize = reqs.size; // this can be larger than slab->size typeBits &= reqs.memoryTypeBits; // this can restrict the types } VkMemoryType type; int index; if (!find_best_memtype(ma, typeBits, heap->flags, &type, &index)) goto error; PL_INFO(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d", (size_t) slab->size, (unsigned) type.propertyFlags, index, (int) type.heapIndex); minfo.memoryTypeIndex = index; VK(vkAllocateMemory(vk->dev, &minfo, VK_ALLOC, &slab->mem)); if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); if (slab->buffer) VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0)); return slab; error: slab_free(vk, slab); return NULL; } static void insert_region(struct vk_slab *slab, struct vk_region region) { if (region.start == region.end) return; bool big_enough = region_len(region) >= PLVK_HEAP_MINIMUM_REGION_SIZE; // Find the index of the first region that comes after this for (int i = 0; i < slab->num_regions; i++) { struct vk_region *r = &slab->regions[i]; // Check for a few special cases which can be coalesced if (r->end == region.start) { // The new region is at the tail of this region. In addition to // modifying this region, we also need to coalesce all the following // regions for as long as possible r->end = region.end; struct vk_region *next = &slab->regions[i+1]; while (i+1 < slab->num_regions && r->end == next->start) { r->end = next->end; TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1); } return; } if (r->start == region.end) { // The new region is at the head of this region. We don't need to // do anything special here - because if this could be further // coalesced backwards, the previous loop iteration would already // have caught it. r->start = region.start; return; } if (r->start > region.start) { // The new region comes somewhere before this region, so insert // it into this index in the array. if (big_enough) { TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions, i, region); } return; } } // If we've reached the end of this loop, then all of the regions // come before the new region, and are disconnected - so append it if (big_enough) TARRAY_APPEND(slab, slab->regions, slab->num_regions, region); } static void heap_uninit(struct vk_ctx *vk, struct vk_heap *heap) { for (int i = 0; i < heap->num_slabs; i++) slab_free(vk, heap->slabs[i]); talloc_free(heap->slabs); *heap = (struct vk_heap){0}; } struct vk_malloc *vk_malloc_create(struct vk_ctx *vk) { struct vk_malloc *ma = talloc_zero(NULL, struct vk_malloc); vkGetPhysicalDeviceMemoryProperties(vk->physd, &ma->props); ma->vk = vk; PL_INFO(vk, "Memory heaps supported by device:"); for (int i = 0; i < ma->props.memoryHeapCount; i++) { VkMemoryHeap heap = ma->props.memoryHeaps[i]; PL_INFO(vk, " heap %d: flags 0x%x size %zu", i, (unsigned) heap.flags, (size_t) heap.size); } PL_INFO(vk, "Memory types supported by device:"); for (int i = 0; i < ma->props.memoryTypeCount; i++) { VkMemoryType type = ma->props.memoryTypes[i]; PL_INFO(vk, " type %d: flags 0x%x heap %d", i, (unsigned) type.propertyFlags, (int) type.heapIndex); } return ma; } void vk_malloc_destroy(struct vk_malloc **ma_ptr) { struct vk_malloc *ma = *ma_ptr; if (!ma) return; for (int i = 0; i < ma->num_heaps; i++) heap_uninit(ma->vk, &ma->heaps[i]); TA_FREEP(ma_ptr); } void vk_free_memslice(struct vk_malloc *ma, struct vk_memslice slice) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = slice.priv; if (!slab) return; pl_assert(slab->used >= slice.size); slab->used -= slice.size; PL_DEBUG(vk, "Freeing slice %zu + %zu from slab with size %zu", (size_t) slice.offset, (size_t) slice.size, (size_t) slab->size); if (slab->dedicated) { // If the slab was purpose-allocated for this memslice, we can just // free it here slab_free(vk, slab); } else { // Return the allocation to the free space map insert_region(slab, (struct vk_region) { .start = slice.offset, .end = slice.offset + slice.size, }); } } // reqs: can be NULL static struct vk_heap *find_heap(struct vk_malloc *ma, VkBufferUsageFlags usage, VkMemoryPropertyFlags flags, VkMemoryRequirements *reqs) { int typeBits = reqs ? reqs->memoryTypeBits : 0; for (int i = 0; i < ma->num_heaps; i++) { if (ma->heaps[i].usage != usage) continue; if (ma->heaps[i].flags != flags) continue; if (ma->heaps[i].typeBits != typeBits) continue; return &ma->heaps[i]; } // Not found => add it TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1); struct vk_heap *heap = &ma->heaps[ma->num_heaps++]; *heap = (struct vk_heap) { .usage = usage, .flags = flags, .typeBits = typeBits, }; return heap; } static inline bool region_fits(struct vk_region r, size_t size, size_t align) { return PL_ALIGN2(r.start, align) + size <= r.end; } // Finds the best-fitting region in a heap. If the heap is too small or too // fragmented, a new slab will be allocated under the hood. static bool heap_get_region(struct vk_malloc *ma, struct vk_heap *heap, size_t size, size_t align, struct vk_slab **out_slab, int *out_index) { struct vk_slab *slab = NULL; // If the allocation is very big, serve it directly instead of bothering // with the heap if (size > PLVK_HEAP_MAXIMUM_SLAB_SIZE) { slab = slab_alloc(ma, heap, size); *out_slab = slab; *out_index = 0; return !!slab; } for (int i = 0; i < heap->num_slabs; i++) { slab = heap->slabs[i]; if (slab->size < size) continue; // Attempt a best fit search int best = -1; for (int n = 0; n < slab->num_regions; n++) { struct vk_region r = slab->regions[n]; if (!region_fits(r, size, align)) continue; if (best >= 0 && region_len(r) > region_len(slab->regions[best])) continue; best = n; } if (best >= 0) { *out_slab = slab; *out_index = best; return true; } } // Otherwise, allocate a new vk_slab and append it to the list. size_t cur_size = PL_MAX(size, slab ? slab->size : 0); size_t slab_size = PLVK_HEAP_SLAB_GROWTH_RATE * cur_size; slab_size = PL_MAX(PLVK_HEAP_MINIMUM_SLAB_SIZE, slab_size); slab_size = PL_MIN(PLVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size); pl_assert(slab_size >= size); slab = slab_alloc(ma, heap, slab_size); if (!slab) return false; TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab); // Return the only region there is in a newly allocated slab pl_assert(slab->num_regions == 1); *out_slab = slab; *out_index = 0; return true; } static bool slice_heap(struct vk_malloc *ma, struct vk_heap *heap, size_t size, size_t alignment, struct vk_memslice *out) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab; int index; alignment = PL_ALIGN2(alignment, vk->limits.bufferImageGranularity); if (!heap_get_region(ma, heap, size, alignment, &slab, &index)) return false; struct vk_region reg = slab->regions[index]; TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index); *out = (struct vk_memslice) { .vkmem = slab->mem, .offset = PL_ALIGN2(reg.start, alignment), .size = size, .priv = slab, }; PL_DEBUG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu", (size_t) out->offset, (size_t) out->size, (size_t) slab->size); size_t out_end = out->offset + out->size; insert_region(slab, (struct vk_region) { reg.start, out->offset }); insert_region(slab, (struct vk_region) { out_end, reg.end }); slab->used += size; return true; } bool vk_malloc_generic(struct vk_malloc *ma, VkMemoryRequirements reqs, VkMemoryPropertyFlags flags, struct vk_memslice *out) { struct vk_heap *heap = find_heap(ma, 0, flags, &reqs); return slice_heap(ma, heap, reqs.size, reqs.alignment, out); } bool vk_malloc_buffer(struct vk_malloc *ma, VkBufferUsageFlags bufFlags, VkMemoryPropertyFlags memFlags, VkDeviceSize size, VkDeviceSize alignment, struct vk_bufslice *out) { struct vk_heap *heap = find_heap(ma, bufFlags, memFlags, NULL); if (!slice_heap(ma, heap, size, alignment, &out->mem)) return false; struct vk_slab *slab = out->mem.priv; out->buf = slab->buffer; if (slab->data) out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset); return true; } libplacebo-0.4.0/src/vulkan/malloc.h000066400000000000000000000042571324021332500173210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // All memory allocated from a vk_malloc MUST be explicitly released by // the caller before vk_malloc_destroy is called. struct vk_malloc *vk_malloc_create(struct vk_ctx *vk); void vk_malloc_destroy(struct vk_malloc **ma); // Represents a single "slice" of generic (non-buffer) memory, plus some // metadata for accounting. This struct is essentially read-only. struct vk_memslice { VkDeviceMemory vkmem; VkDeviceSize offset; VkDeviceSize size; void *priv; }; void vk_free_memslice(struct vk_malloc *ma, struct vk_memslice slice); bool vk_malloc_generic(struct vk_malloc *ma, VkMemoryRequirements reqs, VkMemoryPropertyFlags flags, struct vk_memslice *out); // Represents a single "slice" of a larger buffer struct vk_bufslice { struct vk_memslice mem; // must be freed by the user when done VkBuffer buf; // the buffer this memory was sliced from // For persistently mapped buffers, this points to the first usable byte of // this slice. void *data; }; // Allocate a buffer slice. This is more efficient than vk_malloc_generic for // when the user needs lots of buffers, since it doesn't require // creating/destroying lots of (little) VkBuffers. `alignment` must be a power // of two. bool vk_malloc_buffer(struct vk_malloc *ma, VkBufferUsageFlags bufFlags, VkMemoryPropertyFlags memFlags, VkDeviceSize size, VkDeviceSize alignment, struct vk_bufslice *out); libplacebo-0.4.0/src/vulkan/swapchain.c000066400000000000000000000430001324021332500200070ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "formats.h" #include "utils.h" #include "gpu.h" #include "swapchain.h" struct priv { struct vk_ctx *vk; VkSurfaceKHR surf; // current swapchain and metadata: VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype VkSwapchainKHR swapchain; VkSwapchainKHR old_swapchain; int swapchain_depth; int frames_in_flight; // number of frames currently queued struct pl_color_repr color_repr; struct pl_color_space color_space; // state of the images: const struct pl_tex **images; // pl_tex wrappers for the VkImages int num_images; // size of `images` VkSemaphore *sems_in; // pool of semaphores used to synchronize images VkSemaphore *sems_out; // outgoing semaphores (rendering complete) int num_sems; // size of `sems_in` / `sems_out` int idx_sems; // index of next free semaphore pair int last_imgidx; // the image index last acquired (for submit) }; static struct pl_sw_fns vulkan_swapchain; static bool pick_surf_format(const struct pl_gpu *gpu, const struct vk_ctx *vk, VkSurfaceKHR surf, VkSurfaceFormatKHR *out_format) { VkSurfaceFormatKHR *formats = NULL; int num = 0; VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, surf, &num, NULL)); formats = talloc_array(NULL, VkSurfaceFormatKHR, num); VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, surf, &num, formats)); for (int i = 0; i < num; i++) { // A value of VK_FORMAT_UNDEFINED means we can pick anything we want if (formats[i].format == VK_FORMAT_UNDEFINED) { *out_format = (VkSurfaceFormatKHR) { .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, .format = VK_FORMAT_R16G16B16A16_UNORM, }; talloc_free(formats); return true; } // Try and avoid exotic color spaces like HDR output for now // TODO: support these and map the correct `pl_color_space` if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) continue; // Format whitelist, since we want only >= 8 bit _UNORM formats switch (formats[i].format) { case VK_FORMAT_R8G8B8_UNORM: case VK_FORMAT_B8G8R8_UNORM: case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: case VK_FORMAT_A2R10G10B10_UNORM_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32: case VK_FORMAT_R16G16B16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM: break; // accept default: continue; } // Make sure we can wrap this format to a meaningful, renderable pl_format for (int n = 0; n < gpu->num_formats; n++) { const struct pl_fmt *rafmt = gpu->formats[n]; const struct vk_format *vkfmt = rafmt->priv; if (vkfmt->ifmt != formats[i].format) continue; if (!(rafmt->caps & PL_FMT_CAP_RENDERABLE)) continue; // format valid, use it *out_format = formats[i]; talloc_free(formats); return true; } } // fall through error: PL_FATAL(vk, "Failed picking any valid, renderable surface format!"); talloc_free(formats); return false; } const struct pl_swapchain *pl_vulkan_create_swapchain(const struct pl_vulkan *plvk, const struct pl_vulkan_swapchain_params *params) { struct vk_ctx *vk = plvk->priv; const struct pl_gpu *gpu = plvk->gpu; VkSurfaceFormatKHR sfmt = params->surface_format; if (!sfmt.format && !pick_surf_format(gpu, vk, params->surface, &sfmt)) return NULL; struct pl_swapchain *sw = talloc_zero(NULL, struct pl_swapchain); sw->impl = &vulkan_swapchain; sw->ctx = vk->ctx; sw->gpu = gpu; struct priv *p = sw->priv = talloc_zero(sw, struct priv); p->vk = vk; p->surf = params->surface; p->swapchain_depth = PL_DEF(params->swapchain_depth, 3); pl_assert(p->swapchain_depth > 0); p->protoInfo = (VkSwapchainCreateInfoKHR) { .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .surface = p->surf, .imageFormat = sfmt.format, .imageColorSpace = sfmt.colorSpace, .imageArrayLayers = 1, // non-stereoscopic .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, .minImageCount = p->swapchain_depth + 1, // +1 for the FB .presentMode = params->present_mode, .clipped = true, }; p->color_space = pl_color_space_monitor; p->color_repr = (struct pl_color_repr) { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, .alpha = PL_ALPHA_UNKNOWN, // will be updated by vk_sw_recreate }; // Make sure the swapchain present mode is supported VkPresentModeKHR *modes = NULL; int num_modes; VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, NULL)); modes = talloc_array(NULL, VkPresentModeKHR, num_modes); VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, modes)); bool supported = false; for (int i = 0; i < num_modes; i++) supported |= (modes[i] == p->protoInfo.presentMode); talloc_free(modes); if (!supported) { PL_WARN(vk, "Requested swap mode unsupported by this device, falling " "back to VK_PRESENT_MODE_FIFO_KHR"); p->protoInfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; } return sw; error: talloc_free(modes); talloc_free(sw); return NULL; } static void vk_sw_destroy(const struct pl_swapchain *sw) { const struct pl_gpu *gpu = sw->gpu; struct priv *p = sw->priv; struct vk_ctx *vk = p->vk; pl_gpu_flush(gpu); vk_wait_idle(vk); for (int i = 0; i < p->num_images; i++) pl_tex_destroy(gpu, &p->images[i]); for (int i = 0; i < p->num_sems; i++) { vkDestroySemaphore(vk->dev, p->sems_in[i], VK_ALLOC); vkDestroySemaphore(vk->dev, p->sems_out[i], VK_ALLOC); } vkDestroySwapchainKHR(vk->dev, p->swapchain, VK_ALLOC); talloc_free((void *) sw); } static int vk_sw_latency(const struct pl_swapchain *sw) { struct priv *p = sw->priv; return p->swapchain_depth; } static bool update_swapchain_info(struct priv *p, VkSwapchainCreateInfoKHR *info) { struct vk_ctx *vk = p->vk; // Query the supported capabilities and update this struct as needed VkSurfaceCapabilitiesKHR caps; VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, p->surf, &caps)); // Sorted by preference static const struct { VkCompositeAlphaFlagsKHR vk_mode; enum pl_alpha_mode pl_mode; } alphaModes[] = { {VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, PL_ALPHA_PREMULTIPLIED}, {VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, PL_ALPHA_INDEPENDENT}, {VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, PL_ALPHA_UNKNOWN}, }; for (int i = 0; i < PL_ARRAY_SIZE(alphaModes); i++) { if (caps.supportedCompositeAlpha & alphaModes[i].vk_mode) { info->compositeAlpha = alphaModes[i].vk_mode; p->color_repr.alpha = alphaModes[i].pl_mode; break; } } if (!info->compositeAlpha) { PL_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)", caps.supportedCompositeAlpha); goto error; } // Note: We could probably also allow picking a surface transform that // flips the framebuffer and set `pl_swapchain_frame.flipped`, but this // doesn't appear to be necessary for any vulkan implementations. static const VkSurfaceTransformFlagsKHR rotModes[] = { VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, }; for (int i = 0; i < PL_ARRAY_SIZE(rotModes); i++) { if (caps.supportedTransforms & rotModes[i]) { info->preTransform = rotModes[i]; break; } } if (!info->preTransform) { PL_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)", caps.supportedTransforms); goto error; } // Image count as required PL_DEBUG(vk, "Requested image count: %d (min %d max %d)", (int) info->minImageCount, (int) caps.minImageCount, (int) caps.maxImageCount); info->minImageCount = PL_MAX(info->minImageCount, caps.minImageCount); if (caps.maxImageCount) info->minImageCount = PL_MIN(info->minImageCount, caps.maxImageCount); // This seems to be an obscure case, and doesn't make sense anyway. So just // ignore it and assume we're using a sane environment where the current // window size is known. if (caps.currentExtent.width == 0xFFFFFFFF || caps.currentExtent.height == 0xFFFFFFFF) { PL_ERR(vk, "The swapchain's current extent is reported as unknown. " "In other words, we don't know the size of the window. Giving up!"); goto error; } // This seems to be an obscure case that should technically violate the spec // anyway, but better safe than sorry.. if (!caps.currentExtent.width || !caps.currentExtent.height) { PL_WARN(vk, "Unable to recreate swapchain: image extent is 0, possibly " "the window is minimized or hidden?"); goto error; } info->imageExtent = caps.currentExtent; // We just request whatever usage we can, and let the pl_vk decide what // pl_tex_params that translates to. This makes the images as flexible // as possible. info->imageUsage = caps.supportedUsageFlags; return true; error: return false; } static void destroy_swapchain(struct vk_ctx *vk, struct priv *p) { assert(p->old_swapchain); vkDestroySwapchainKHR(vk->dev, p->old_swapchain, VK_ALLOC); p->old_swapchain = NULL; } static bool vk_sw_recreate(const struct pl_swapchain *sw) { const struct pl_gpu *gpu = sw->gpu; struct priv *p = sw->priv; struct vk_ctx *vk = p->vk; VkImage *vkimages = NULL; int num_images = 0; // It's invalid to trigger another swapchain recreation while there's more // than one swapchain already active, so we need to flush any pending // asynchronous swapchain release operations that may be ongoing while (p->old_swapchain) vk_poll_commands(vk, 1000000); // 1 ms VkSwapchainCreateInfoKHR sinfo = p->protoInfo; sinfo.oldSwapchain = p->swapchain; if (!update_swapchain_info(p, &sinfo)) goto error; PL_INFO(sw, "(Re)creating swapchain of size %dx%d", sinfo.imageExtent.width, sinfo.imageExtent.height); VK(vkCreateSwapchainKHR(vk->dev, &sinfo, VK_ALLOC, &p->swapchain)); // Freeing the old swapchain while it's still in use is an error, so do it // asynchronously once the device is idle if (sinfo.oldSwapchain) { p->old_swapchain = sinfo.oldSwapchain; vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p); } // Get the new swapchain images VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, NULL)); vkimages = talloc_array(NULL, VkImage, num_images); VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, vkimages)); // If needed, allocate some more semaphores while (num_images > p->num_sems) { VkSemaphore sem_in, sem_out; static const VkSemaphoreCreateInfo seminfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; VK(vkCreateSemaphore(vk->dev, &seminfo, VK_ALLOC, &sem_in)); VK(vkCreateSemaphore(vk->dev, &seminfo, VK_ALLOC, &sem_out)); int idx = p->num_sems++; TARRAY_GROW(p, p->sems_in, idx); TARRAY_GROW(p, p->sems_out, idx); p->sems_in[idx] = sem_in; p->sems_out[idx] = sem_out; } // Recreate the pl_tex wrappers for (int i = 0; i < p->num_images; i++) pl_tex_destroy(gpu, &p->images[i]); p->num_images = num_images; TARRAY_GROW(p, p->images, num_images); for (int i = 0; i < num_images; i++) { p->images[i] = pl_vk_wrap_swimg(gpu, vkimages[i], sinfo); if (!p->images[i]) goto error; } pl_assert(num_images > 0); int bits = 0; // The channel with the most bits is probably the most authoritative about // the actual color information (consider e.g. a2bgr10). Slight downside // in that it results in rounding r/b for e.g. rgb565, but we don't pick // surfaces with fewer than 8 bits anyway, so let's not care for now. const struct pl_fmt *fmt = p->images[0]->params.format; for (int i = 0; i < fmt->num_components; i++) bits = PL_MAX(bits, fmt->component_depth[i]); p->color_repr.bits.sample_depth = bits; p->color_repr.bits.color_depth = bits; talloc_free(vkimages); return true; error: PL_ERR(vk, "Failed (re)creating swapchain!"); talloc_free(vkimages); vkDestroySwapchainKHR(vk->dev, p->swapchain, VK_ALLOC); p->swapchain = NULL; return false; } static bool vk_sw_start_frame(const struct pl_swapchain *sw, struct pl_swapchain_frame *out_frame) { struct priv *p = sw->priv; struct vk_ctx *vk = p->vk; if (!p->swapchain && !vk_sw_recreate(sw)) return false; VkSemaphore sem_in = p->sems_in[p->idx_sems]; PL_TRACE(vk, "vkAcquireNextImageKHR signals %p", (void *) sem_in); for (int attempts = 0; attempts < 2; attempts++) { uint32_t imgidx = 0; VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, sem_in, NULL, &imgidx); switch (res) { case VK_SUCCESS: p->last_imgidx = imgidx; *out_frame = (struct pl_swapchain_frame) { .fbo = p->images[imgidx], .flipped = false, .color_repr = p->color_repr, .color_space = p->color_space, }; pl_tex_vk_external_dep(sw->gpu, out_frame->fbo, sem_in); return true; case VK_ERROR_OUT_OF_DATE_KHR: { // In these cases try recreating the swapchain if (!vk_sw_recreate(sw)) return false; continue; } default: PL_ERR(vk, "Failed acquiring swapchain image: %s", vk_res_str(res)); return false; } } // If we've exhausted the number of attempts to recreate the swapchain, // just give up silently and let the user retry some time later. return false; } static void present_cb(struct priv *p, void *arg) { p->frames_in_flight--; } static bool vk_sw_submit_frame(const struct pl_swapchain *sw) { const struct pl_gpu *gpu = sw->gpu; struct priv *p = sw->priv; struct vk_ctx *vk = p->vk; if (!p->swapchain) return false; struct vk_cmd *cmd = pl_vk_finish_frame(gpu, p->images[p->last_imgidx]); if (!cmd) return false; VkSemaphore sem_out = p->sems_out[p->idx_sems++]; p->idx_sems %= p->num_sems; vk_cmd_sig(cmd, sem_out); p->frames_in_flight++; vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); vk_cmd_queue(vk, cmd); if (!vk_flush_commands(vk)) return false; // Older nvidia drivers can spontaneously combust when submitting to the // same queue as we're rendering from, in a multi-queue scenario. Safest // option is to flush the commands first and then submit to the next queue. // We can drop this hack in the future, I suppose. struct vk_cmdpool *pool = vk->pool_graphics; VkQueue queue = pool->queues[pool->idx_queues]; VkPresentInfoKHR pinfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .waitSemaphoreCount = 1, .pWaitSemaphores = &sem_out, .swapchainCount = 1, .pSwapchains = &p->swapchain, .pImageIndices = &p->last_imgidx, }; PL_TRACE(vk, "vkQueuePresentKHR waits on %p", (void *) sem_out); VkResult res = vkQueuePresentKHR(queue, &pinfo); switch (res) { case VK_SUCCESS: case VK_SUBOPTIMAL_KHR: return true; case VK_ERROR_OUT_OF_DATE_KHR: // We can silently ignore this error, since the next start_frame will // recreate the swapchain automatically. return true; default: PL_ERR(vk, "Failed presenting to queue %p: %s", (void *) queue, vk_res_str(res)); return false; } } static void vk_sw_swap_buffers(const struct pl_swapchain *sw) { struct priv *p = sw->priv; while (p->frames_in_flight >= p->swapchain_depth) vk_poll_commands(p->vk, 1000000); // 1 ms } static struct pl_sw_fns vulkan_swapchain = { .destroy = vk_sw_destroy, .latency = vk_sw_latency, .start_frame = vk_sw_start_frame, .submit_frame = vk_sw_submit_frame, .swap_buffers = vk_sw_swap_buffers, }; libplacebo-0.4.0/src/vulkan/utils.c000066400000000000000000000124461324021332500172040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "utils.h" const char *vk_res_str(VkResult res) { switch (res) { #define CASE(name) case name: return #name // success codes CASE(VK_SUCCESS); CASE(VK_NOT_READY); CASE(VK_TIMEOUT); CASE(VK_EVENT_SET); CASE(VK_EVENT_RESET); CASE(VK_INCOMPLETE); // error codes CASE(VK_ERROR_OUT_OF_HOST_MEMORY); CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); CASE(VK_ERROR_INITIALIZATION_FAILED); CASE(VK_ERROR_DEVICE_LOST); CASE(VK_ERROR_MEMORY_MAP_FAILED); CASE(VK_ERROR_LAYER_NOT_PRESENT); CASE(VK_ERROR_EXTENSION_NOT_PRESENT); CASE(VK_ERROR_FEATURE_NOT_PRESENT); CASE(VK_ERROR_INCOMPATIBLE_DRIVER); CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); // Symbols introduced by extensions (explicitly guarded against so we can // make this switch exhaustive without requiring bleeding edge versions // of vulkan.h) #ifdef VK_KHR_maintenance1 CASE(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); #endif #ifdef VK_KHR_external_memory CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); #endif #ifdef VK_KHR_surface CASE(VK_ERROR_SURFACE_LOST_KHR); CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); #endif #ifdef VK_KHR_swapchain CASE(VK_SUBOPTIMAL_KHR); CASE(VK_ERROR_OUT_OF_DATE_KHR); #endif #ifdef VK_KHR_display_swapchain CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); #endif #ifdef VK_NV_glsl_shader CASE(VK_ERROR_INVALID_SHADER_NV); #endif #ifdef VK_EXT_debug_report CASE(VK_ERROR_VALIDATION_FAILED_EXT); #endif #ifdef VK_EXT_global_priority CASE(VK_ERROR_NOT_PERMITTED_EXT); #endif // Included to satisfy the switch coverage check CASE(VK_RESULT_RANGE_SIZE); CASE(VK_RESULT_MAX_ENUM); #undef CASE } return "unknown error"; } const char *vk_obj_str(VkDebugReportObjectTypeEXT obj) { switch (obj) { #define CASE(name, str) case VK_DEBUG_REPORT_OBJECT_TYPE_##name##_EXT: return #str CASE(INSTANCE, VkInstance); CASE(PHYSICAL_DEVICE, VkPhysicalDevice); CASE(DEVICE, VkDevice); CASE(QUEUE, VkQueue); CASE(SEMAPHORE, VkSemaphore); CASE(COMMAND_BUFFER, VkCommandBuffer); CASE(FENCE, VkFence); CASE(DEVICE_MEMORY, VkDeviceMemory); CASE(BUFFER, VkBuffer); CASE(IMAGE, VkImage); CASE(EVENT, VkEvent); CASE(QUERY_POOL, VkQueryPool); CASE(BUFFER_VIEW, VkBufferView); CASE(IMAGE_VIEW, VkImageView); CASE(SHADER_MODULE, VkShaderModule); CASE(PIPELINE_CACHE, VkPipelineCache); CASE(PIPELINE_LAYOUT, VkPipelineLayout); CASE(RENDER_PASS, VkRenderPass); CASE(PIPELINE, VkPipeline); CASE(DESCRIPTOR_SET_LAYOUT, VkDescriptorSetLayout); CASE(SAMPLER, VkSampler); CASE(DESCRIPTOR_POOL, VkDescriptorPool); CASE(DESCRIPTOR_SET, VkDescriptorSet); CASE(FRAMEBUFFER, VkFramebuffer); CASE(COMMAND_POOL, VkCommandPool); // Objects introduced by extensions #ifdef VK_KHR_surface CASE(SURFACE_KHR, VkSurfaceKHR); #endif #ifdef VK_KHR_swapchain CASE(SWAPCHAIN_KHR, VkSwapchainKHR); #endif #ifdef VK_KHR_display CASE(DISPLAY_KHR, VkDisplayKHR); CASE(DISPLAY_MODE_KHR, VkDisplayModeKHR); #endif #ifdef VK_KHR_descriptor_update_template CASE(DESCRIPTOR_UPDATE_TEMPLATE_KHR, VkDescriptorUpdateTemplateKHR); #endif #ifdef VK_KHR_sampler_ycbcr_conversion CASE(SAMPLER_YCBCR_CONVERSION_KHR, VkSamplerYcbcrConversionInfoKHR); #endif #ifdef VK_EXT_validation_cache CASE(VALIDATION_CACHE, VkValidationCacheEXT); #endif #ifdef VK_EXT_debug_report CASE(DEBUG_REPORT, VkDebugReportCallbackEXT); #endif #ifdef VK_NVX_device_generated_commands CASE(OBJECT_TABLE_NVX, VkObjectTableNVX); CASE(INDIRECT_COMMANDS_LAYOUT_NVX, VkIndirectCommandsLayoutNVX); #endif // Included to satisfy the switch coverage check case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT: case VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT: case VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT: break; #undef CASE } return "unknown object"; } libplacebo-0.4.0/src/vulkan/utils.h000066400000000000000000000031631324021332500172050ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Return a human-readable name for various vulkan enums const char *vk_res_str(VkResult res); const char *vk_obj_str(VkDebugReportObjectTypeEXT obj); // Convenience macros to simplify a lot of common boilerplate #define VK_ASSERT(res, str) \ do { \ if (res != VK_SUCCESS) { \ PL_ERR(vk, str ": %s", vk_res_str(res)); \ goto error; \ } \ } while (0) #define VK(cmd) \ do { \ PL_TRACE(vk, #cmd); \ VkResult res ## __LINE__ = (cmd); \ VK_ASSERT(res ## __LINE__, #cmd); \ } while (0)