pax_global_header00006660000000000000000000000064146142436500014517gustar00rootroot0000000000000052 comment=f7dcc1ea60819475dffd3a45059e16f04381bee7 liburing-2.6/000077500000000000000000000000001461424365000132015ustar00rootroot00000000000000liburing-2.6/.github/000077500000000000000000000000001461424365000145415ustar00rootroot00000000000000liburing-2.6/.github/pull_request_template.md000066400000000000000000000056661461424365000215170ustar00rootroot00000000000000 ---- ## git request-pull output: ``` Generate your PR shortlog and diffstat with these commands: git remote add axboe-tree https://github.com/axboe/liburing git fetch axboe-tree git request-pull axboe-tree/master your_fork_URL your_branch_name Then replace this with the output of `git request-pull` command. ``` ----
Click to show/hide pull request guidelines ## Pull Request Guidelines 1. To make everyone easily filter pull request from the email notification, use `[GIT PULL]` as a prefix in your PR title. ``` [GIT PULL] Your Pull Request Title ``` 2. Follow the commit message format rules below. 3. Follow the Linux kernel coding style (see: https://github.com/torvalds/linux/blob/master/Documentation/process/coding-style.rst). ### Commit message format rules: 1. The first line is title (don't be more than 72 chars if possible). 2. Then an empty line. 3. Then a description (may be omitted for truly trivial changes). 4. Then an empty line again (if it has a description). 5. Then a `Signed-off-by` tag with your real name and email. For example: ``` Signed-off-by: Foo Bar ``` The description should be word-wrapped at 72 chars. Some things should not be word-wrapped. They may be some kind of quoted text - long compiler error messages, oops reports, Link, etc. (things that have a certain specific format). Note that all of this goes in the commit message, not in the pull request text. The pull request text should introduce what this pull request does, and each commit message should explain the rationale for why that particular change was made. The git tree is canonical source of truth, not github. Each patch should do one thing, and one thing only. If you find yourself writing an explanation for why a patch is fixing multiple issues, that's a good indication that the change should be split into separate patches. If the commit is a fix for an issue, add a `Fixes` tag with the issue URL. Don't use GitHub anonymous email like this as the commit author: ``` 123456789+username@users.noreply.github.com ``` Use a real email address! ### Commit message example: ``` src/queue: don't flush SQ ring for new wait interface If we have IORING_FEAT_EXT_ARG, then timeouts are done through the syscall instead of by posting an internal timeout. This was done to be both more efficient, but also to enable multi-threaded use the wait side. If we touch the SQ state by flushing it, that isn't safe without synchronization. Fixes: https://github.com/axboe/liburing/issues/402 Signed-off-by: Jens Axboe ```
---- ## By submitting this pull request, I acknowledge that: 1. I have followed the above pull request guidelines. 2. I have the rights to submit this work under the same license. 3. I agree to a Developer Certificate of Origin (see https://developercertificate.org for more information). liburing-2.6/.github/workflows/000077500000000000000000000000001461424365000165765ustar00rootroot00000000000000liburing-2.6/.github/workflows/build.yml000066400000000000000000000100401461424365000204130ustar00rootroot00000000000000name: Build test on: # Trigger the workflow on push or pull requests. push: pull_request: jobs: build: runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: include: # x86-64 gcc - arch: x86_64 cc_pkg: gcc-x86-64-linux-gnu cxx_pkg: g++-x86-64-linux-gnu cc: x86_64-linux-gnu-gcc cxx: x86_64-linux-gnu-g++ # x86-64 clang - arch: x86_64 cc_pkg: clang cxx_pkg: clang cc: clang cxx: clang++ liburing_extra_flags: -Wshorten-64-to-32 extra_flags: -Wmissing-prototypes -Wstrict-prototypes -Wunreachable-code-loop-increment -Wunreachable-code -Wmissing-variable-declarations -Wextra-semi-stmt # x86 (32-bit) gcc - arch: i686 cc_pkg: gcc-i686-linux-gnu cxx_pkg: g++-i686-linux-gnu cc: i686-linux-gnu-gcc cxx: i686-linux-gnu-g++ # aarch64 gcc - arch: aarch64 cc_pkg: gcc-aarch64-linux-gnu cxx_pkg: g++-aarch64-linux-gnu cc: aarch64-linux-gnu-gcc cxx: aarch64-linux-gnu-g++ # arm (32-bit) gcc - arch: arm cc_pkg: gcc-arm-linux-gnueabi cxx_pkg: g++-arm-linux-gnueabi cc: arm-linux-gnueabi-gcc cxx: arm-linux-gnueabi-g++ # riscv64 - arch: riscv64 cc_pkg: gcc-riscv64-linux-gnu cxx_pkg: g++-riscv64-linux-gnu cc: riscv64-linux-gnu-gcc cxx: riscv64-linux-gnu-g++ # powerpc64 - arch: powerpc64 cc_pkg: gcc-powerpc64-linux-gnu cxx_pkg: g++-powerpc64-linux-gnu cc: powerpc64-linux-gnu-gcc cxx: powerpc64-linux-gnu-g++ # powerpc - arch: powerpc cc_pkg: gcc-powerpc-linux-gnu cxx_pkg: g++-powerpc-linux-gnu cc: powerpc-linux-gnu-gcc cxx: powerpc-linux-gnu-g++ # alpha - arch: alpha cc_pkg: gcc-alpha-linux-gnu cxx_pkg: g++-alpha-linux-gnu cc: alpha-linux-gnu-gcc cxx: alpha-linux-gnu-g++ # mips64 - arch: mips64 cc_pkg: gcc-mips64-linux-gnuabi64 cxx_pkg: g++-mips64-linux-gnuabi64 cc: mips64-linux-gnuabi64-gcc cxx: mips64-linux-gnuabi64-g++ # mips - arch: mips cc_pkg: gcc-mips-linux-gnu cxx_pkg: g++-mips-linux-gnu cc: mips-linux-gnu-gcc cxx: mips-linux-gnu-g++ # hppa - arch: hppa cc_pkg: gcc-hppa-linux-gnu cxx_pkg: g++-hppa-linux-gnu cc: hppa-linux-gnu-gcc cxx: hppa-linux-gnu-g++ env: FLAGS: -g -O3 -Wall -Wextra -Werror -Wno-sign-compare ${{matrix.extra_flags}} # Flags for building sources in src/ dir only. LIBURING_CFLAGS: ${{matrix.liburing_extra_flags}} steps: - name: Checkout source uses: actions/checkout@v3 - name: Install Compilers run: | if [[ "${{matrix.cc_pkg}}" == "clang" ]]; then \ wget https://apt.llvm.org/llvm.sh -O /tmp/llvm.sh; \ sudo apt-get purge --auto-remove llvm python3-lldb-14 llvm-14 -y; \ sudo bash /tmp/llvm.sh 17; \ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-17 400; \ sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-17 400; \ else \ sudo apt-get update -y; \ sudo apt-get install -y ${{matrix.cc_pkg}} ${{matrix.cxx_pkg}}; \ fi; - name: Display compiler versions run: | ${{matrix.cc}} --version; ${{matrix.cxx}} --version; - name: Build run: | ./configure --cc=${{matrix.cc}} --cxx=${{matrix.cxx}}; make -j$(nproc) V=1 CPPFLAGS="-Werror" CFLAGS="$FLAGS" CXXFLAGS="$FLAGS"; - name: Test install command run: | sudo make install; liburing-2.6/.github/workflows/shellcheck.yml000066400000000000000000000005431461424365000214300ustar00rootroot00000000000000name: Shellcheck on: # Trigger the workflow on push or pull requests. push: pull_request: jobs: test: runs-on: ubuntu-22.04 steps: - name: Checkout source uses: actions/checkout@v3 - name: Display shellcheck version run: shellcheck --version - name: Shellcheck execution run: shellcheck test/runtest*.sh liburing-2.6/.gitignore000066400000000000000000000011361461424365000151720ustar00rootroot00000000000000*.rej *.orig *~ /*.patch *.d *.o *.o[ls] /src/liburing.a /src/liburing.so* /src/liburing-ffi.a /src/liburing-ffi.so* /src/include/liburing/compat.h /src/include/liburing/io_uring_version.h /examples/io_uring-close-test /examples/io_uring-cp /examples/io_uring-test /examples/io_uring-udp /examples/link-cp /examples/napi-busy-poll-client /examples/napi-busy-poll-server /examples/ucontext-cp /examples/poll-bench /examples/proxy /examples/send-zerocopy /examples/rsrc-update-bench /test/*.t /test/*.dmesg /test/output/ config-host.h config-host.mak config.log liburing.pc liburing-ffi.pc cscope.out liburing-2.6/CHANGELOG000066400000000000000000000074331461424365000144220ustar00rootroot00000000000000liburing-2.6 release - Add getsockopt and setsockopt socket commands - Add test cases to test/hardlink - Man page fixes - Add futex support, and test cases - Add waitid support, and test cases - Add read multishot, and test cases - Add support for IORING_SETUP_NO_SQARRAY - Use IORING_SETUP_NO_SQARRAY as the default - Add support for IORING_OP_FIXED_FD_INSTALL - Add io_uring_prep_fixed_fd_install() helper - Support for napi busy polling - Improve/add test cases - Man page fixes - Add sample 'proxy' example liburing-2.5 release - Add support for io_uring_prep_cmd_sock() - Add support for application allocated ring memory, for placing rings in huge mem. Available through io_uring_queue_init_mem(). - Add support for registered ring fds - Various documentation updates - Various fixes liburing-2.4 release - Add io_uring_{major,minor,check}_version() functions. - Add IO_URING_{MAJOR,MINOR,CHECK}_VERSION() macros. - FFI support (for non-C/C++ languages integration). - Add io_uring_prep_msg_ring_cqe_flags() function. - Deprecate --nolibc configure option. - CONFIG_NOLIBC is always enabled on x86-64, x86, and aarch64. - Add support for IORING_REGISTER_USE_REGISTERED_RING and use if available. - Add io_uring_close_ring_fd() function. - Add io_uring_prep_msg_ring_fd_alloc function. - Add io_uring_free_buf_ring() and io_uring_setup_buf_ring() functions. - Ensure that io_uring_prep_accept_direct(), io_uring_prep_openat_direct(), io_uring_prep_openat2_direct(), io_uring_prep_msg_ring_fd(), and io_uring_prep_socket_direct() factor in being called with IORING_FILE_INDEX_ALLOC for allocating a direct descriptor. - Add io_uring_prep_sendto() function. - Add io_uring_prep_cmd_sock() function. liburing-2.3 release - Support non-libc build for aarch64. - Add io_uring_{enter,enter2,register,setup} syscall functions. - Add sync cancel interface, io_uring_register_sync_cancel(). - Fix return value of io_uring_submit_and_wait_timeout() to match the man page. - Improvements to the regression tests - Add support and test case for passthrough IO - Add recv and recvmsg multishot helpers and support - Add documentation and support for IORING_SETUP_DEFER_TASKRUN - Fix potential missing kernel entry with IORING_SETUP_IOPOLL - Add support and documentation for zero-copy network transmit - Various optimizations - Many cleanups - Many man page additions and updates liburing-2.2 release - Support non-libc builds. - Optimized syscall handling for x86-64/x86/aarch64. - Enable non-lib function calls for fast path functions. - Add support for multishot accept. - io_uring_register_files() will set RLIMIT_NOFILE if necessary. - Add support for registered ring fds, io_uring_register_ring_fd(), reducing the overhead of an io_uring_enter() system call. - Add support for the message ring opcode. - Add support for newer request cancelation features. - Add support for IORING_SETUP_COOP_TASKRUN, which can help reduce the overhead of io_uring in general. Most applications should set this flag, see the io_uring_setup.2 man page for details. - Add support for registering a sparse buffer and file set. - Add support for a new buffer provide scheme, see io_uring_register_buf_ring.3 for details. - Add io_uring_submit_and_wait_timeout() for submitting IO and waiting for completions with a timeout. - Add io_uring_prep_{read,write}v2 prep helpers. - Add io_uring_prep_close_direct() helper. - Add support for SQE128 and CQE32, which are doubly sized SQE and CQE rings. This is needed for some cases of the new IORING_OP_URING_CMD, notably for NVMe passthrough. - ~5500 lines of man page additions, including adding ~90 new man pages. - Synced with the 5.19 kernel release, supporting all the features of 5.19 and earlier. - 24 new regression test cases, and ~7000 lines of new tests in general. - General optimizations and fixes. liburing-2.6/CITATION.cff000066400000000000000000000003641461424365000150760ustar00rootroot00000000000000cff-version: 1.2.0 preferred-citation: type: software authors: - family-names: "Axboe" given-names: "Jens" email: axboe@kernel.dk title: "liburing library for io_uring" year: 2022 url: "https://github.com/axboe/liburing" licence: MIT liburing-2.6/COPYING000066400000000000000000000636311461424365000142450ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! liburing-2.6/COPYING.GPL000066400000000000000000000432541461424365000146650ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. liburing-2.6/LICENSE000066400000000000000000000020321461424365000142030ustar00rootroot00000000000000Copyright 2020 Jens Axboe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. liburing-2.6/Makefile000066400000000000000000000052471461424365000146510ustar00rootroot00000000000000include Makefile.common RPMBUILD=$(shell `which rpmbuild >&/dev/null` && echo "rpmbuild" || echo "rpm") INSTALL=install default: all all: @$(MAKE) -C src @$(MAKE) -C test @$(MAKE) -C examples .PHONY: all install default clean test .PHONY: FORCE cscope runtests: all @$(MAKE) -C test runtests runtests-loop: all @$(MAKE) -C test runtests-loop runtests-parallel: all @$(MAKE) -C test runtests-parallel config-host.mak: configure +@if [ ! -e "$@" ]; then \ echo "Running configure ..."; \ ./configure; \ else \ echo "$@ is out-of-date, running configure"; \ sed -n "/.*Configured with/s/[^:]*: //p" "$@" | sh; \ fi ifneq ($(MAKECMDGOALS),clean) include config-host.mak endif %.pc: %.pc.in config-host.mak $(SPECFILE) sed -e "s%@prefix@%$(prefix)%g" \ -e "s%@libdir@%$(libdir)%g" \ -e "s%@includedir@%$(includedir)%g" \ -e "s%@NAME@%$(NAME)%g" \ -e "s%@VERSION@%$(VERSION)%g" \ $< >$@ install: $(NAME).pc $(NAME)-ffi.pc @$(MAKE) -C src install prefix=$(DESTDIR)$(prefix) \ includedir=$(DESTDIR)$(includedir) \ libdir=$(DESTDIR)$(libdir) \ libdevdir=$(DESTDIR)$(libdevdir) \ relativelibdir=$(relativelibdir) $(INSTALL) -D -m 644 $(NAME).pc $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc $(INSTALL) -D -m 644 $(NAME)-ffi.pc $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME)-ffi.pc $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man2 $(INSTALL) -m 644 man/*.2 $(DESTDIR)$(mandir)/man2 $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man3 $(INSTALL) -m 644 man/*.3 $(DESTDIR)$(mandir)/man3 $(INSTALL) -m 755 -d $(DESTDIR)$(mandir)/man7 $(INSTALL) -m 644 man/*.7 $(DESTDIR)$(mandir)/man7 uninstall: @$(MAKE) -C src uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) @rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME).pc @rm -f $(DESTDIR)$(libdevdir)/pkgconfig/$(NAME)-ffi.pc @rm -rf $(DESTDIR)$(mandir)/man2/io_uring*.2 @rm -rf $(DESTDIR)$(mandir)/man3/io_uring*.3 @rm -rf $(DESTDIR)$(mandir)/man7/io_uring*.7 install-tests: @$(MAKE) -C test install prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) uninstall-tests: @$(MAKE) -C test uninstall prefix=$(DESTDIR)$(prefix) datadir=$(DESTDIR)$(datadir) clean: @rm -f config-host.mak config-host.h cscope.out $(NAME).pc $(NAME)-ffi.pc test/*.dmesg @$(MAKE) -C src clean @$(MAKE) -C test clean @$(MAKE) -C examples clean cscope: @cscope -b -R tag-archive: @git tag $(TAG) create-archive: @git archive --prefix=$(NAME)-$(VERSION)/ -o $(NAME)-$(VERSION).tar.gz $(TAG) @echo "The final archive is ./$(NAME)-$(VERSION).tar.gz." archive: clean tag-archive create-archive srpm: create-archive $(RPMBUILD) --define "_sourcedir `pwd`" --define "_srcrpmdir `pwd`" --nodeps -bs $(SPECFILE) liburing-2.6/Makefile.common000066400000000000000000000004651461424365000161350ustar00rootroot00000000000000TOP := $(dir $(CURDIR)/$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))) NAME=liburing SPECFILE=$(TOP)/$(NAME).spec VERSION=$(shell awk '/Version:/ { print $$2 }' $(SPECFILE)) VERSION_MAJOR=$(shell echo $(VERSION) | cut -d. -f1) VERSION_MINOR=$(shell echo $(VERSION) | cut -d. -f2) TAG = $(NAME)-$(VERSION) liburing-2.6/Makefile.quiet000066400000000000000000000003551461424365000157720ustar00rootroot00000000000000ifneq ($(findstring $(MAKEFLAGS),s),s) ifndef V QUIET_CC = @echo ' ' CC $@; QUIET_CXX = @echo ' ' CXX $@; QUIET_LINK = @echo ' ' LINK $@; QUIET_AR = @echo ' ' AR $@; QUIET_RANLIB = @echo '' RANLIB $@; endif endif liburing-2.6/README000066400000000000000000000060001461424365000140550ustar00rootroot00000000000000liburing -------- This is the io_uring library, liburing. liburing provides helpers to setup and teardown io_uring instances, and also a simplified interface for applications that don't need (or want) to deal with the full kernel side implementation. For more info on io_uring, please see: https://kernel.dk/io_uring.pdf Subscribe to io-uring@vger.kernel.org for io_uring related discussions and development for both kernel and userspace. The list is archived here: https://lore.kernel.org/io-uring/ kernel version dependency -------------------------- liburing itself is not tied to any specific kernel release, and hence it's possible to use the newest liburing release even on older kernels (and vice versa). Newer features may only be available on more recent kernels, obviously. ulimit settings --------------- io_uring accounts memory it needs under the rlimit memlocked option, which can be quite low on some setups (64K). The default is usually enough for most use cases, but bigger rings or things like registered buffers deplete it quickly. root isn't under this restriction, but regular users are. Going into detail on how to bump the limit on various systems is beyond the scope of this little blurb, but check /etc/security/limits.conf for user specific settings, or /etc/systemd/user.conf and /etc/systemd/system.conf for systemd setups. This affects 5.11 and earlier, new kernels are less dependent on RLIMIT_MEMLOCK as it is only used for registering buffers. Regressions tests ----------------- The bulk of liburing is actually regression/unit tests for both liburing and the kernel io_uring support. Please note that this suite isn't expected to pass on older kernels, and may even crash or hang older kernels! Building liburing ----------------- # # Prepare build config (optional). # # --cc specifies the C compiler. # --cxx specifies the C++ compiler. # ./configure --cc=gcc --cxx=g++; # # Build liburing. # make -j$(nproc); # # Install liburing (headers, shared/static libs, and manpage). # sudo make install; See './configure --help' for more information about build config options. FFI support ----------- By default, the build results in 4 lib files: 2 shared libs: liburing.so liburing-ffi.so 2 static libs: liburing.a liburing-ffi.a Languages and applications that can't use 'static inline' functions in liburing.h should use the FFI variants. liburing's main public interface lives in liburing.h as 'static inline' functions. Users wishing to consume liburing purely as a binary dependency should link against liburing-ffi. It contains definitions for every 'static inline' function. License ------- All software contained within this repo is dual licensed LGPL and MIT, see COPYING and LICENSE, except for a header coming from the kernel which is dual licensed GPL with a Linux-syscall-note exception and MIT, see COPYING.GPL and . Jens Axboe 2022-05-19 liburing-2.6/SECURITY.md000066400000000000000000000003161461424365000147720ustar00rootroot00000000000000# Security Policy ## Reporting a Vulnerability Please report any security issue to axboe@kernel.dk where the issue will be triaged appropriately. Thank you in advance for helping to keep liburing secure. liburing-2.6/configure000077500000000000000000000337661461424365000151270ustar00rootroot00000000000000#!/bin/sh set -e cc=${CC:-gcc} cxx=${CXX:-g++} for opt do optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)' || true) case "$opt" in --help|-h) show_help=yes ;; --prefix=*) prefix="$optarg" ;; --includedir=*) includedir="$optarg" ;; --libdir=*) libdir="$optarg" ;; --libdevdir=*) libdevdir="$optarg" ;; --mandir=*) mandir="$optarg" ;; --datadir=*) datadir="$optarg" ;; --cc=*) cc="$optarg" ;; --cxx=*) cxx="$optarg" ;; --use-libc) use_libc=yes ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" exit 1 ;; esac done if test -z "$prefix"; then prefix=/usr fi if test -z "$includedir"; then includedir="$prefix/include" fi if test -z "$libdir"; then libdir="$prefix/lib" fi if test -z "$libdevdir"; then libdevdir="$prefix/lib" fi if test -z "$mandir"; then mandir="$prefix/man" fi if test -z "$datadir"; then datadir="$prefix/share" fi if test x"$libdir" = x"$libdevdir"; then relativelibdir="" else relativelibdir="$libdir/" fi if test "$show_help" = "yes"; then cat < trap "rm -rf $TMP_DIRECTORY" EXIT INT QUIT TERM rm -rf config.log config_host_mak="config-host.mak" config_host_h="config-host.h" rm -rf $config_host_mak rm -rf $config_host_h fatal() { echo $@ echo "Configure failed, check config.log and/or the above output" rm -rf $config_host_mak rm -rf $config_host_h exit 1 } # Print result for each configuration test print_config() { printf "%-30s%s\n" "$1" "$2" } # Default CFLAGS CFLAGS="-D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -include config-host.h" BUILD_CFLAGS="" # Print configure header at the top of $config_host_h echo "/*" > $config_host_h echo " * Automatically generated by configure - do not modify" >> $config_host_h printf " * Configured with:" >> $config_host_h printf " * '%s'" "$0" "$@" >> $config_host_h echo "" >> $config_host_h echo " */" >> $config_host_h echo "# Automatically generated by configure - do not modify" > $config_host_mak printf "# Configured with:" >> $config_host_mak printf " '%s'" "$0" "$@" >> $config_host_mak echo >> $config_host_mak do_cxx() { # Run the compiler, capturing its output to the log. echo $cxx "$@" >> config.log $cxx "$@" >> config.log 2>&1 || return $? return 0 } do_cc() { # Run the compiler, capturing its output to the log. echo $cc "$@" >> config.log $cc "$@" >> config.log 2>&1 || return $? # Test passed. If this is an --enable-werror build, rerun # the test with -Werror and bail out if it fails. This # makes warning-generating-errors in configure test code # obvious to developers. if test "$werror" != "yes"; then return 0 fi # Don't bother rerunning the compile if we were already using -Werror case "$*" in *-Werror*) return 0 ;; esac echo $cc -Werror "$@" >> config.log $cc -Werror "$@" >> config.log 2>&1 && return $? echo "ERROR: configure test passed without -Werror but failed with -Werror." echo "This is probably a bug in the configure script. The failing command" echo "will be at the bottom of config.log." fatal "You can run configure with --disable-werror to bypass this check." } compile_prog() { local_cflags="$1" local_ldflags="$2 $LIBS" echo "Compiling test case $3" >> config.log do_cc $CFLAGS $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags } compile_prog_cxx() { local_cflags="$1" local_ldflags="$2 $LIBS" echo "Compiling test case $3" >> config.log do_cxx $CFLAGS $local_cflags -o $TMPE $TMPCXX $LDFLAGS $local_ldflags } has() { type "$1" >/dev/null 2>&1 } output_mak() { echo "$1=$2" >> $config_host_mak } output_sym() { output_mak "$1" "y" echo "#define $1" >> $config_host_h } print_and_output_mak() { print_config "$1" "$2" output_mak "$1" "$2" } print_and_output_mak "prefix" "$prefix" print_and_output_mak "includedir" "$includedir" print_and_output_mak "libdir" "$libdir" print_and_output_mak "libdevdir" "$libdevdir" print_and_output_mak "relativelibdir" "$relativelibdir" print_and_output_mak "mandir" "$mandir" print_and_output_mak "datadir" "$datadir" #################################################### # Check for correct compiler runtime library to link with libgcc_link_flag="-lgcc" if $cc -print-libgcc-file-name >/dev/null 2>&1; then libgcc_link_flag="$($cc $CFLAGS $LDFLAGS -print-libgcc-file-name)" fi print_and_output_mak "libgcc_link_flag" "$libgcc_link_flag" #################################################### ########################################## # check for compiler -Wstringop-overflow stringop_overflow="no" cat > $TMPC << EOF #include int main(int argc, char **argv) { return 0; } EOF if compile_prog "-Werror -Wstringop-overflow=0" "" "stringop_overflow"; then stringop_overflow="yes" fi print_config "stringop_overflow" "$stringop_overflow" ########################################## # check for compiler -Warryr-bounds array_bounds="no" cat > $TMPC << EOF #include int main(int argc, char **argv) { return 0; } EOF if compile_prog "-Werror -Warray-bounds=0" "" "array_bounds"; then array_bounds="yes" fi print_config "array_bounds" "$array_bounds" ########################################## # check for __kernel_rwf_t __kernel_rwf_t="no" cat > $TMPC << EOF #include int main(int argc, char **argv) { __kernel_rwf_t x; x = 0; return x; } EOF if compile_prog "" "" "__kernel_rwf_t"; then __kernel_rwf_t="yes" fi print_config "__kernel_rwf_t" "$__kernel_rwf_t" ########################################## # check for __kernel_timespec __kernel_timespec="no" cat > $TMPC << EOF #include #include int main(int argc, char **argv) { struct __kernel_timespec ts; ts.tv_sec = 0; ts.tv_nsec = 1; return 0; } EOF if compile_prog "" "" "__kernel_timespec"; then __kernel_timespec="yes" fi print_config "__kernel_timespec" "$__kernel_timespec" ########################################## # check for open_how open_how="no" cat > $TMPC << EOF #include #include #include #include int main(int argc, char **argv) { struct open_how how; how.flags = 0; how.mode = 0; how.resolve = 0; return 0; } EOF if compile_prog "" "" "open_how"; then open_how="yes" fi print_config "open_how" "$open_how" ########################################## # check for statx statx="no" cat > $TMPC << EOF #include #include #include #include #include int main(int argc, char **argv) { struct statx x; return memset(&x, 0, sizeof(x)) != NULL; } EOF if compile_prog "" "" "statx"; then statx="yes" fi print_config "statx" "$statx" ########################################## # check for glibc statx glibc_statx="no" cat > $TMPC << EOF #include #include #include #include #include int main(int argc, char **argv) { struct statx x; return memset(&x, 0, sizeof(x)) != NULL; } EOF if compile_prog "" "" "glibc_statx"; then glibc_statx="yes" fi print_config "glibc_statx" "$glibc_statx" ########################################## # check for C++ has_cxx="no" cat > $TMPCXX << EOF #include int main(int argc, char **argv) { std::cout << "Test"; return 0; } EOF if compile_prog_cxx "" "" "C++"; then has_cxx="yes" fi print_config "C++" "$has_cxx" ########################################## # check for ucontext support has_ucontext="no" cat > $TMPC << EOF #include int main(int argc, char **argv) { ucontext_t ctx; getcontext(&ctx); makecontext(&ctx, 0, 0); return 0; } EOF if compile_prog "" "" "has_ucontext"; then has_ucontext="yes" fi print_config "has_ucontext" "$has_ucontext" ########################################## # Check NVME_URING_CMD support nvme_uring_cmd="no" cat > $TMPC << EOF #include int main(void) { struct nvme_uring_cmd *cmd; return sizeof(struct nvme_uring_cmd); } EOF if compile_prog "" "" "nvme uring cmd"; then nvme_uring_cmd="yes" fi print_config "NVMe uring command support" "$nvme_uring_cmd" ########################################## # Check futexv support futexv="no" cat > $TMPC << EOF #include #include #include int main(void) { struct futex_waitv fw; memset(&fw, FUTEX_32, sizeof(fw)); return sizeof(struct futex_waitv); } EOF if compile_prog "" "" "futexv"; then futexv="yes" fi print_config "futex waitv support" "$futexv" ########################################## # Check idtype_t support has_idtype_t="no" cat > $TMPC << EOF #include int main(void) { idtype_t v; return 0; } EOF if compile_prog "" "" "idtype_t"; then has_idtype_t="yes" fi print_config "has_idtype_t" "$has_idtype_t" ############################################################################# liburing_nolibc="no" if test "$use_libc" != "yes"; then # # Currently, CONFIG_NOLIBC only supports x86-64, x86 (32-bit), aarch64 and riscv64. # cat > $TMPC << EOF int main(void){ #if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) return 0; #else #error libc is needed #endif } EOF if compile_prog "" "" "nolibc"; then liburing_nolibc="yes" fi fi print_config "nolibc" "$liburing_nolibc"; ############################################################################# #################################################### # Most Android devices don't have sys/fanotify.h has_fanotify="no" cat > $TMPC << EOF #include int main(void) { return 0; } EOF if compile_prog "" "" "fanotify"; then has_fanotify="yes" fi print_config "has_fanotify" "$has_fanotify" #################################################### if test "$liburing_nolibc" = "yes"; then output_sym "CONFIG_NOLIBC" fi if test "$__kernel_rwf_t" = "yes"; then output_sym "CONFIG_HAVE_KERNEL_RWF_T" fi if test "$__kernel_timespec" = "yes"; then output_sym "CONFIG_HAVE_KERNEL_TIMESPEC" fi if test "$open_how" = "yes"; then output_sym "CONFIG_HAVE_OPEN_HOW" fi if test "$statx" = "yes"; then output_sym "CONFIG_HAVE_STATX" fi if test "$glibc_statx" = "yes"; then output_sym "CONFIG_HAVE_GLIBC_STATX" fi if test "$has_cxx" = "yes"; then output_sym "CONFIG_HAVE_CXX" fi if test "$has_ucontext" = "yes"; then output_sym "CONFIG_HAVE_UCONTEXT" fi if test "$stringop_overflow" = "yes"; then output_sym "CONFIG_HAVE_STRINGOP_OVERFLOW" fi if test "$array_bounds" = "yes"; then output_sym "CONFIG_HAVE_ARRAY_BOUNDS" fi if test "$nvme_uring_cmd" = "yes"; then output_sym "CONFIG_HAVE_NVME_URING" fi if test "$has_fanotify" = "yes"; then output_sym "CONFIG_HAVE_FANOTIFY" fi if test "$futexv" = "yes"; then output_sym "CONFIG_HAVE_FUTEXV" fi echo "CC=$cc" >> $config_host_mak print_config "CC" "$cc" echo "CXX=$cxx" >> $config_host_mak print_config "CXX" "$cxx" # generate io_uring_version.h # Reset MAKEFLAGS MAKEFLAGS= MAKE_PRINT_VARS="include Makefile.common\nprint-%%: ; @echo \$(\$*)\n" VERSION_MAJOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MAJOR) VERSION_MINOR=$(printf "$MAKE_PRINT_VARS" | make -s --no-print-directory -f - print-VERSION_MINOR) io_uring_version_h="src/include/liburing/io_uring_version.h" cat > $io_uring_version_h << EOF /* SPDX-License-Identifier: MIT */ #ifndef LIBURING_VERSION_H #define LIBURING_VERSION_H #define IO_URING_VERSION_MAJOR $VERSION_MAJOR #define IO_URING_VERSION_MINOR $VERSION_MINOR #endif EOF # generate compat.h compat_h="src/include/liburing/compat.h" cat > $compat_h << EOF /* SPDX-License-Identifier: MIT */ #ifndef LIBURING_COMPAT_H #define LIBURING_COMPAT_H EOF if test "$__kernel_rwf_t" != "yes"; then cat >> $compat_h << EOF typedef int __kernel_rwf_t; EOF fi if test "$__kernel_timespec" != "yes"; then cat >> $compat_h << EOF #include struct __kernel_timespec { int64_t tv_sec; long long tv_nsec; }; /* is not available, so it can't be included */ #define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1 EOF else cat >> $compat_h << EOF #include /* is included above and not needed again */ #define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1 EOF fi if test "$open_how" != "yes"; then cat >> $compat_h << EOF #include struct open_how { uint64_t flags; uint64_t mode; uint64_t resolve; }; EOF else cat >> $compat_h << EOF #include EOF fi if [ "$glibc_statx" = "no" ] && [ "$statx" = "yes" ]; then cat >> $compat_h << EOF #include EOF fi if test "$futexv" != "yes"; then cat >> $compat_h << EOF #include #define FUTEX_32 2 #define FUTEX_WAITV_MAX 128 struct futex_waitv { uint64_t val; uint64_t uaddr; uint32_t flags; uint32_t __reserved; }; EOF fi if test "$has_idtype_t" != "yes"; then cat >> $compat_h << EOF typedef enum { P_ALL, /* Wait for any child. */ P_PID, /* Wait for specified process. */ P_PGID /* Wait for members of process group. */ } idtype_t; EOF fi cat >> $compat_h << EOF #endif EOF liburing-2.6/debian/000077500000000000000000000000001461424365000144235ustar00rootroot00000000000000liburing-2.6/debian/README.Debian000066400000000000000000000003541461424365000164660ustar00rootroot00000000000000liburing for Debian The newest Linux IO interface i.e. io_uring, need userspace library to support it. This package liburing is the library for io_uring. -- Liu Changcheng Thu, 14 Nov 2019 21:35:39 +0800 liburing-2.6/debian/changelog000066400000000000000000000021531461424365000162760ustar00rootroot00000000000000liburing (2.2-1) stable; urgency=low * Update to 2.2 * Bump up so version to 2 * Drop liburing1-udeb * Package using dh instead of using dh_* helpers manually * Add linux header dependency to liburing-dev * Bump up debhelper-compact level to 13 -- Kefu Chai Sun, 16 Oct 2022 16:30:48 +0800 liburing (0.7-1) stable; urgency=low * Update to 0.7 * Fix library symlinks -- Stefan Metzmacher Thu, 23 Jul 2020 00:23:00 +0200 liburing (0.4-2) stable; urgency=low * Fix /usr/lib/*/liburing.so symlink to /lib/*/liburing.so.1.0.4 -- Stefan Metzmacher Fri, 07 Feb 2020 15:30:00 +0100 liburing (0.4-1) stable; urgency=low * Package liburing-0.4 using a packaging layout similar to libaio1 -- Stefan Metzmacher Thu, 06 Feb 2020 11:30:00 +0100 liburing (0.2-1ubuntu1) stable; urgency=low * Initial release. * commit 4bce856d43ab1f9a64477aa5a8f9f02f53e64b74 * Author: Jens Axboe * Date: Mon Nov 11 16:00:58 2019 -0700 -- Liu Changcheng Fri, 15 Nov 2019 00:06:46 +0800 liburing-2.6/debian/control000066400000000000000000000023101461424365000160220ustar00rootroot00000000000000Source: liburing Section: libs Priority: optional Maintainer: Liu Changcheng Build-Depends: debhelper-compat (= 13) Standards-Version: 4.1.4 Homepage: https://git.kernel.dk/cgit/liburing/tree/README Vcs-Git: https://git.kernel.dk/liburing Vcs-Browser: https://git.kernel.dk/cgit/liburing/ Package: liburing2 Architecture: linux-any Multi-Arch: same Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends} Description: userspace library for using io_uring io_uring is kernel feature to improve development The newese Linux IO interface, io_uring could improve system performance a lot. liburing is the userpace library to use io_uring feature. . This package contains the shared library. Package: liburing-dev Section: libdevel Architecture: linux-any Multi-Arch: same Depends: ${misc:Depends}, liburing2 (= ${binary:Version}), linux-libc-dev (>= 5.1) Description: userspace library for using io_uring io_uring is kernel feature to improve development The newese Linux IO interface, io_uring could improve system performance a lot. liburing is the userpace library to use io_uring feature. . This package contains the static library and the header files. liburing-2.6/debian/copyright000066400000000000000000000041751461424365000163650ustar00rootroot00000000000000Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: liburing Source: https://git.kernel.dk/cgit/liburing/ Files: * Copyright: 2019 Jens Axboe License: GPL-2+ / MIT Files: debian/* Copyright: 2019 Changcheng Liu License: GPL-2+ License: GPL-2+ This package is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. . This package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. . You should have received a copy of the GNU General Public License along with this program. If not, see . On Debian systems, the complete text of the GNU General Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". License: MIT Copyright 2020 Jens Axboe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. liburing-2.6/debian/liburing-dev.install000066400000000000000000000001031461424365000203740ustar00rootroot00000000000000usr/include usr/lib/*/lib*.so usr/lib/*/lib*.a usr/lib/*/pkgconfig liburing-2.6/debian/liburing-dev.manpages000066400000000000000000000002401461424365000205230ustar00rootroot00000000000000usr/share/man/man2/io_uring_*.2 usr/share/man/man3/io_uring_*.3 usr/share/man/man7/io_uring.7 usr/share/man/man3/IO_URING_*.3 usr/share/man/man3/__io_uring_*.3 liburing-2.6/debian/liburing2.install000066400000000000000000000000241461424365000177040ustar00rootroot00000000000000usr/lib/*/lib*.so.* liburing-2.6/debian/liburing2.symbols000066400000000000000000000052501461424365000177340ustar00rootroot00000000000000liburing.so.2 liburing2 #MINVER# [47/1887] LIBURING_2.0@LIBURING_2.0 0.7-1 LIBURING_2.1@LIBURING_2.1 0.7-1 LIBURING_2.2@LIBURING_2.2 0.7-1 LIBURING_2.3@LIBURING_2.3 0.7-1 __io_uring_get_cqe@LIBURING_2.0 0.7-1 __io_uring_sqring_wait@LIBURING_2.0 0.7-1 io_uring_enter2@LIBURING_2.3 0.7-1 io_uring_enter@LIBURING_2.3 0.7-1 io_uring_free_probe@LIBURING_2.0 0.7-1 io_uring_get_events@LIBURING_2.3 0.7-1 io_uring_get_probe@LIBURING_2.0 0.7-1 io_uring_get_probe_ring@LIBURING_2.0 0.7-1 io_uring_get_sqe@LIBURING_2.0 0.7-1 io_uring_mlock_size@LIBURING_2.1 0.7-1 io_uring_mlock_size_params@LIBURING_2.1 0.7-1 io_uring_peek_batch_cqe@LIBURING_2.0 0.7-1 io_uring_queue_exit@LIBURING_2.0 0.7-1 io_uring_queue_init@LIBURING_2.0 0.7-1 io_uring_queue_init_params@LIBURING_2.0 0.7-1 io_uring_queue_mmap@LIBURING_2.0 0.7-1 io_uring_register@LIBURING_2.3 0.7-1 io_uring_register_buf_ring@LIBURING_2.2 0.7-1 io_uring_register_buffers@LIBURING_2.0 0.7-1 io_uring_register_buffers_sparse@LIBURING_2.2 0.7-1 io_uring_register_buffers_tags@LIBURING_2.1 0.7-1 io_uring_register_buffers_update_tag@LIBURING_2.1 0.7-1 io_uring_register_eventfd@LIBURING_2.0 0.7-1 io_uring_register_eventfd_async@LIBURING_2.0 0.7-1 io_uring_register_file_alloc_range@LIBURING_2.3 0.7-1 io_uring_register_files@LIBURING_2.0 0.7-1 io_uring_register_files_sparse@LIBURING_2.2 0.7-1 io_uring_register_files_tags@LIBURING_2.1 0.7-1 io_uring_register_files_update@LIBURING_2.0 0.7-1 io_uring_register_files_update_tag@LIBURING_2.1 0.7-1 io_uring_register_iowq_aff@LIBURING_2.1 0.7-1 io_uring_register_iowq_max_workers@LIBURING_2.1 0.7-1 io_uring_register_personality@LIBURING_2.0 0.7-1 io_uring_register_probe@LIBURING_2.0 0.7-1 io_uring_register_ring_fd@LIBURING_2.2 0.7-1 io_uring_register_sync_cancel@LIBURING_2.3 0.7-1 io_uring_ring_dontfork@LIBURING_2.0 0.7-1 io_uring_setup@LIBURING_2.3 0.7-1 io_uring_submit@LIBURING_2.0 0.7-1 io_uring_submit_and_get_events@LIBURING_2.3 0.7-1 io_uring_submit_and_wait@LIBURING_2.0 0.7-1 io_uring_submit_and_wait_timeout@LIBURING_2.2 0.7-1 io_uring_unregister_buf_ring@LIBURING_2.2 0.7-1 io_uring_unregister_buffers@LIBURING_2.0 0.7-1 io_uring_unregister_eventfd@LIBURING_2.0 0.7-1 io_uring_unregister_files@LIBURING_2.0 0.7-1 io_uring_unregister_iowq_aff@LIBURING_2.1 0.7-1 io_uring_unregister_personality@LIBURING_2.0 0.7-1 io_uring_unregister_ring_fd@LIBURING_2.2 0.7-1 io_uring_wait_cqe_timeout@LIBURING_2.0 0.7-1 io_uring_wait_cqes@LIBURING_2.0 0.7-1 liburing-2.6/debian/patches/000077500000000000000000000000001461424365000160525ustar00rootroot00000000000000liburing-2.6/debian/patches/series000066400000000000000000000001011461424365000172570ustar00rootroot00000000000000# You must remove unused comment lines for the released package. liburing-2.6/debian/rules000077500000000000000000000011761461424365000155100ustar00rootroot00000000000000#!/usr/bin/make -f # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow DEB_CFLAGS_MAINT_PREPEND = -Wall DEB_BUILD_OPTIONS += nocheck include /usr/share/dpkg/default.mk include /usr/share/dpkg/buildtools.mk %: dh $@ --parallel override_dh_auto_configure: ./configure \ --prefix=/usr \ --includedir=/usr/include \ --datadir=/usr/share \ --mandir=/usr/share/man \ --libdir=/usr/lib/$(DEB_HOST_MULTIARCH) \ --libdevdir=/usr/lib/$(DEB_HOST_MULTIARCH) \ --cc=$(CC) override_dh_auto_test: ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) $(MAKE) runtests endif liburing-2.6/debian/source/000077500000000000000000000000001461424365000157235ustar00rootroot00000000000000liburing-2.6/debian/source/format000066400000000000000000000000141461424365000171310ustar00rootroot000000000000003.0 (quilt) liburing-2.6/debian/source/local-options000066400000000000000000000000541461424365000204300ustar00rootroot00000000000000#abort-on-upstream-changes #unapply-patches liburing-2.6/debian/source/options000066400000000000000000000001301461424365000173330ustar00rootroot00000000000000extend-diff-ignore = "(^|/)(config\.log|config-host\.h|config-host\.mak|liburing\.pc)$" liburing-2.6/debian/watch000066400000000000000000000002721461424365000154550ustar00rootroot00000000000000# Site Directory Pattern Version Script version=4 https://git.kernel.dk/cgit/liburing/ snapshot\/liburing-([\d\.]+)\.tar\.(?:gz|xz) debian uupdate liburing-2.6/examples/000077500000000000000000000000001461424365000150175ustar00rootroot00000000000000liburing-2.6/examples/Makefile000066400000000000000000000020001461424365000164470ustar00rootroot00000000000000CPPFLAGS ?= override CPPFLAGS += -D_GNU_SOURCE -I../src/include/ CFLAGS ?= -g -O2 -Wall LDFLAGS ?= override LDFLAGS += -L../src/ -luring include ../Makefile.quiet ifneq ($(MAKECMDGOALS),clean) include ../config-host.mak endif LDFLAGS ?= override LDFLAGS += -L../src/ -luring -lpthread example_srcs := \ io_uring-close-test.c \ io_uring-cp.c \ io_uring-test.c \ io_uring-udp.c \ link-cp.c \ napi-busy-poll-client.c \ napi-busy-poll-server.c \ poll-bench.c \ send-zerocopy.c \ rsrc-update-bench.c \ proxy.c all_targets := ifdef CONFIG_HAVE_UCONTEXT example_srcs += ucontext-cp.c endif all_targets += ucontext-cp helpers.o example_targets := $(patsubst %.c,%,$(patsubst %.cc,%,$(example_srcs))) all_targets += $(example_targets) helpers = helpers.o all: $(example_targets) helpers.o: helpers.c $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< %: %.c $(helpers) ../src/liburing.a $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS) clean: @rm -f $(all_targets) .PHONY: all clean liburing-2.6/examples/helpers.c000066400000000000000000000023611461424365000166270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" int setup_listening_socket(int port, int ipv6) { struct sockaddr_in srv_addr = { }; struct sockaddr_in6 srv_addr6 = { }; int fd, enable, ret, domain; if (ipv6) domain = AF_INET6; else domain = AF_INET; fd = socket(domain, SOCK_STREAM, 0); if (fd == -1) { perror("socket()"); return -1; } enable = 1; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); if (ret < 0) { perror("setsockopt(SO_REUSEADDR)"); return -1; } if (ipv6) { srv_addr6.sin6_family = AF_INET6; srv_addr6.sin6_port = htons(port); srv_addr6.sin6_addr = in6addr_any; ret = bind(fd, (const struct sockaddr *)&srv_addr6, sizeof(srv_addr6)); } else { srv_addr.sin_family = AF_INET; srv_addr.sin_port = htons(port); srv_addr.sin_addr.s_addr = htonl(INADDR_ANY); ret = bind(fd, (const struct sockaddr *)&srv_addr, sizeof(srv_addr)); } if (ret < 0) { perror("bind()"); return -1; } if (listen(fd, 1024) < 0) { perror("listen()"); return -1; } return fd; } liburing-2.6/examples/helpers.h000066400000000000000000000002301461424365000166250ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_EX_HELPERS_H #define LIBURING_EX_HELPERS_H int setup_listening_socket(int port, int ipv6); #endif liburing-2.6/examples/io_uring-close-test.c000066400000000000000000000047471461424365000210720ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple app that demonstrates how to setup an io_uring interface, and use it * via a registered ring fd, without leaving the original fd open. * * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-close-test io_uring-close-test.c -luring */ #include #include #include #include #include #include #include #include "liburing.h" #define QD 4 int main(int argc, char *argv[]) { struct io_uring ring; int i, fd, ret, pending, done; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec *iovecs; struct stat sb; ssize_t fsize; off_t offset; void *buf; if (argc < 2) { printf("%s: file\n", argv[0]); return 1; } ret = io_uring_queue_init(QD, &ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return 1; } ret = io_uring_register_ring_fd(&ring); if (ret < 0) { fprintf(stderr, "register_ring_fd: %s\n", strerror(-ret)); return 1; } ret = io_uring_close_ring_fd(&ring); if (ret < 0) { fprintf(stderr, "close_ring_fd: %s\n", strerror(-ret)); return 1; } fd = open(argv[1], O_RDONLY); if (fd < 0) { perror("open"); return 1; } if (fstat(fd, &sb) < 0) { perror("fstat"); return 1; } fsize = 0; iovecs = calloc(QD, sizeof(struct iovec)); for (i = 0; i < QD; i++) { if (posix_memalign(&buf, 4096, 4096)) return 1; iovecs[i].iov_base = buf; iovecs[i].iov_len = 4096; fsize += 4096; } offset = 0; i = 0; do { sqe = io_uring_get_sqe(&ring); if (!sqe) break; io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset); offset += iovecs[i].iov_len; i++; if (offset > sb.st_size) break; } while (1); ret = io_uring_submit(&ring); if (ret < 0) { fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); return 1; } else if (ret != i) { fprintf(stderr, "io_uring_submit submitted less %d\n", ret); return 1; } done = 0; pending = ret; fsize = 0; for (i = 0; i < pending; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret)); return 1; } done++; ret = 0; if (cqe->res != 4096 && cqe->res + fsize != sb.st_size) { fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res); ret = 1; } fsize += cqe->res; io_uring_cqe_seen(&ring, cqe); if (ret) break; } printf("Submitted=%d, completed=%d, bytes=%lu\n", pending, done, (unsigned long) fsize); close(fd); io_uring_queue_exit(&ring); return 0; } liburing-2.6/examples/io_uring-cp.c000066400000000000000000000123161461424365000174010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-cp io_uring-cp.c -luring */ #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #define QD 64 #define BS (32*1024) static int infd, outfd; struct io_data { int read; off_t first_offset, offset; size_t first_len; struct iovec iov; }; static int setup_context(unsigned entries, struct io_uring *ring) { int ret; ret = io_uring_queue_init(entries, ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return -1; } return 0; } static int get_file_size(int fd, off_t *size) { struct stat st; if (fstat(fd, &st) < 0) return -1; if (S_ISREG(st.st_mode)) { *size = st.st_size; return 0; } else if (S_ISBLK(st.st_mode)) { unsigned long long bytes; if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) return -1; *size = bytes; return 0; } return -1; } static void queue_prepped(struct io_uring *ring, struct io_data *data) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); assert(sqe); if (data->read) io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset); else io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset); io_uring_sqe_set_data(sqe, data); } static int queue_read(struct io_uring *ring, off_t size, off_t offset) { struct io_uring_sqe *sqe; struct io_data *data; data = malloc(size + sizeof(*data)); if (!data) return 1; sqe = io_uring_get_sqe(ring); if (!sqe) { free(data); return 1; } data->read = 1; data->offset = data->first_offset = offset; data->iov.iov_base = data + 1; data->iov.iov_len = size; data->first_len = size; io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); io_uring_sqe_set_data(sqe, data); return 0; } static void queue_write(struct io_uring *ring, struct io_data *data) { data->read = 0; data->offset = data->first_offset; data->iov.iov_base = data + 1; data->iov.iov_len = data->first_len; queue_prepped(ring, data); io_uring_submit(ring); } static int copy_file(struct io_uring *ring, off_t insize) { unsigned long reads, writes; struct io_uring_cqe *cqe; off_t write_left, offset; int ret; write_left = insize; writes = reads = offset = 0; while (insize || write_left) { unsigned long had_reads; int got_comp; /* * Queue up as many reads as we can */ had_reads = reads; while (insize) { off_t this_size = insize; if (reads + writes >= QD) break; if (this_size > BS) this_size = BS; else if (!this_size) break; if (queue_read(ring, this_size, offset)) break; insize -= this_size; offset += this_size; reads++; } if (had_reads != reads) { ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); break; } } /* * Queue is full at this point. Find at least one completion. */ got_comp = 0; while (write_left) { struct io_data *data; if (!got_comp) { ret = io_uring_wait_cqe(ring, &cqe); got_comp = 1; } else { ret = io_uring_peek_cqe(ring, &cqe); if (ret == -EAGAIN) { cqe = NULL; ret = 0; } } if (ret < 0) { fprintf(stderr, "io_uring_peek_cqe: %s\n", strerror(-ret)); return 1; } if (!cqe) break; data = io_uring_cqe_get_data(cqe); if (cqe->res < 0) { if (cqe->res == -EAGAIN) { queue_prepped(ring, data); io_uring_submit(ring); io_uring_cqe_seen(ring, cqe); continue; } fprintf(stderr, "cqe failed: %s\n", strerror(-cqe->res)); return 1; } else if ((size_t)cqe->res != data->iov.iov_len) { /* Short read/write, adjust and requeue */ data->iov.iov_base += cqe->res; data->iov.iov_len -= cqe->res; data->offset += cqe->res; queue_prepped(ring, data); io_uring_submit(ring); io_uring_cqe_seen(ring, cqe); continue; } /* * All done. if write, nothing else to do. if read, * queue up corresponding write. */ if (data->read) { queue_write(ring, data); write_left -= data->first_len; reads--; writes++; } else { free(data); writes--; } io_uring_cqe_seen(ring, cqe); } } /* wait out pending writes */ while (writes) { struct io_data *data; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "write res=%d\n", cqe->res); return 1; } data = io_uring_cqe_get_data(cqe); free(data); writes--; io_uring_cqe_seen(ring, cqe); } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; off_t insize; int ret; if (argc < 3) { printf("%s: infile outfile\n", argv[0]); return 1; } infd = open(argv[1], O_RDONLY); if (infd < 0) { perror("open infile"); return 1; } outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); if (outfd < 0) { perror("open outfile"); return 1; } if (setup_context(QD, &ring)) return 1; if (get_file_size(infd, &insize)) return 1; ret = copy_file(&ring, insize); close(infd); close(outfd); io_uring_queue_exit(&ring); return ret; } liburing-2.6/examples/io_uring-test.c000066400000000000000000000043211461424365000177530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple app that demonstrates how to setup an io_uring interface, * submit and complete IO against it, and then tear it down. * * gcc -Wall -O2 -D_GNU_SOURCE -o io_uring-test io_uring-test.c -luring */ #include #include #include #include #include #include #include #include "liburing.h" #define QD 4 int main(int argc, char *argv[]) { struct io_uring ring; int i, fd, ret, pending, done; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec *iovecs; struct stat sb; ssize_t fsize; off_t offset; void *buf; if (argc < 2) { printf("%s: file\n", argv[0]); return 1; } ret = io_uring_queue_init(QD, &ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return 1; } fd = open(argv[1], O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); return 1; } if (fstat(fd, &sb) < 0) { perror("fstat"); return 1; } fsize = 0; iovecs = calloc(QD, sizeof(struct iovec)); for (i = 0; i < QD; i++) { if (posix_memalign(&buf, 4096, 4096)) return 1; iovecs[i].iov_base = buf; iovecs[i].iov_len = 4096; fsize += 4096; } offset = 0; i = 0; do { sqe = io_uring_get_sqe(&ring); if (!sqe) break; io_uring_prep_readv(sqe, fd, &iovecs[i], 1, offset); offset += iovecs[i].iov_len; i++; if (offset >= sb.st_size) break; } while (1); ret = io_uring_submit(&ring); if (ret < 0) { fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); return 1; } else if (ret != i) { fprintf(stderr, "io_uring_submit submitted less %d\n", ret); return 1; } done = 0; pending = ret; fsize = 0; for (i = 0; i < pending; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe: %s\n", strerror(-ret)); return 1; } done++; ret = 0; if (cqe->res != 4096 && cqe->res + fsize != sb.st_size) { fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res); ret = 1; } fsize += cqe->res; io_uring_cqe_seen(&ring, cqe); if (ret) break; } printf("Submitted=%d, completed=%d, bytes=%lu\n", pending, done, (unsigned long) fsize); close(fd); io_uring_queue_exit(&ring); return 0; } liburing-2.6/examples/io_uring-udp.c000066400000000000000000000211671461424365000175730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "liburing.h" #define QD 64 #define BUF_SHIFT 12 /* 4k */ #define CQES (QD * 16) #define BUFFERS CQES #define CONTROLLEN 0 struct sendmsg_ctx { struct msghdr msg; struct iovec iov; }; struct ctx { struct io_uring ring; struct io_uring_buf_ring *buf_ring; unsigned char *buffer_base; struct msghdr msg; int buf_shift; int af; bool verbose; struct sendmsg_ctx send[BUFFERS]; size_t buf_ring_size; }; static size_t buffer_size(struct ctx *ctx) { return 1U << ctx->buf_shift; } static unsigned char *get_buffer(struct ctx *ctx, int idx) { return ctx->buffer_base + (idx << ctx->buf_shift); } static int setup_buffer_pool(struct ctx *ctx) { int ret, i; void *mapped; struct io_uring_buf_reg reg = { .ring_addr = 0, .ring_entries = BUFFERS, .bgid = 0 }; ctx->buf_ring_size = (sizeof(struct io_uring_buf) + buffer_size(ctx)) * BUFFERS; mapped = mmap(NULL, ctx->buf_ring_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); if (mapped == MAP_FAILED) { fprintf(stderr, "buf_ring mmap: %s\n", strerror(errno)); return -1; } ctx->buf_ring = (struct io_uring_buf_ring *)mapped; io_uring_buf_ring_init(ctx->buf_ring); reg = (struct io_uring_buf_reg) { .ring_addr = (unsigned long)ctx->buf_ring, .ring_entries = BUFFERS, .bgid = 0 }; ctx->buffer_base = (unsigned char *)ctx->buf_ring + sizeof(struct io_uring_buf) * BUFFERS; ret = io_uring_register_buf_ring(&ctx->ring, ®, 0); if (ret) { fprintf(stderr, "buf_ring init failed: %s\n" "NB This requires a kernel version >= 6.0\n", strerror(-ret)); return ret; } for (i = 0; i < BUFFERS; i++) { io_uring_buf_ring_add(ctx->buf_ring, get_buffer(ctx, i), buffer_size(ctx), i, io_uring_buf_ring_mask(BUFFERS), i); } io_uring_buf_ring_advance(ctx->buf_ring, BUFFERS); return 0; } static int setup_context(struct ctx *ctx) { struct io_uring_params params; int ret; memset(¶ms, 0, sizeof(params)); params.cq_entries = QD * 8; params.flags = IORING_SETUP_SUBMIT_ALL | IORING_SETUP_COOP_TASKRUN | IORING_SETUP_CQSIZE; ret = io_uring_queue_init_params(QD, &ctx->ring, ¶ms); if (ret < 0) { fprintf(stderr, "queue_init failed: %s\n" "NB: This requires a kernel version >= 6.0\n", strerror(-ret)); return ret; } ret = setup_buffer_pool(ctx); if (ret) io_uring_queue_exit(&ctx->ring); memset(&ctx->msg, 0, sizeof(ctx->msg)); ctx->msg.msg_namelen = sizeof(struct sockaddr_storage); ctx->msg.msg_controllen = CONTROLLEN; return ret; } static int setup_sock(int af, int port) { int ret; int fd; uint16_t nport = port <= 0 ? 0 : htons(port); fd = socket(af, SOCK_DGRAM, 0); if (fd < 0) { fprintf(stderr, "sock_init: %s\n", strerror(errno)); return -1; } if (af == AF_INET6) { struct sockaddr_in6 addr6 = { .sin6_family = af, .sin6_port = nport, .sin6_addr = IN6ADDR_ANY_INIT }; ret = bind(fd, (struct sockaddr *) &addr6, sizeof(addr6)); } else { struct sockaddr_in addr = { .sin_family = af, .sin_port = nport, .sin_addr = { INADDR_ANY } }; ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr)); } if (ret) { fprintf(stderr, "sock_bind: %s\n", strerror(errno)); close(fd); return -1; } if (port <= 0) { int port; struct sockaddr_storage s; socklen_t sz = sizeof(s); if (getsockname(fd, (struct sockaddr *)&s, &sz)) { fprintf(stderr, "getsockname failed\n"); close(fd); return -1; } port = ntohs(((struct sockaddr_in *)&s)->sin_port); fprintf(stderr, "port bound to %d\n", port); } return fd; } static void cleanup_context(struct ctx *ctx) { munmap(ctx->buf_ring, ctx->buf_ring_size); io_uring_queue_exit(&ctx->ring); } static bool get_sqe(struct ctx *ctx, struct io_uring_sqe **sqe) { *sqe = io_uring_get_sqe(&ctx->ring); if (!*sqe) { io_uring_submit(&ctx->ring); *sqe = io_uring_get_sqe(&ctx->ring); } if (!*sqe) { fprintf(stderr, "cannot get sqe\n"); return true; } return false; } static int add_recv(struct ctx *ctx, int idx) { struct io_uring_sqe *sqe; if (get_sqe(ctx, &sqe)) return -1; io_uring_prep_recvmsg_multishot(sqe, idx, &ctx->msg, MSG_TRUNC); sqe->flags |= IOSQE_FIXED_FILE; sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = 0; io_uring_sqe_set_data64(sqe, BUFFERS + 1); return 0; } static void recycle_buffer(struct ctx *ctx, int idx) { io_uring_buf_ring_add(ctx->buf_ring, get_buffer(ctx, idx), buffer_size(ctx), idx, io_uring_buf_ring_mask(BUFFERS), 0); io_uring_buf_ring_advance(ctx->buf_ring, 1); } static int process_cqe_send(struct ctx *ctx, struct io_uring_cqe *cqe) { int idx = cqe->user_data; if (cqe->res < 0) fprintf(stderr, "bad send %s\n", strerror(-cqe->res)); recycle_buffer(ctx, idx); return 0; } static int process_cqe_recv(struct ctx *ctx, struct io_uring_cqe *cqe, int fdidx) { int ret, idx; struct io_uring_recvmsg_out *o; struct io_uring_sqe *sqe; if (!(cqe->flags & IORING_CQE_F_MORE)) { ret = add_recv(ctx, fdidx); if (ret) return ret; } if (cqe->res == -ENOBUFS) return 0; if (!(cqe->flags & IORING_CQE_F_BUFFER) || cqe->res < 0) { fprintf(stderr, "recv cqe bad res %d\n", cqe->res); if (cqe->res == -EFAULT || cqe->res == -EINVAL) fprintf(stderr, "NB: This requires a kernel version >= 6.0\n"); return -1; } idx = cqe->flags >> 16; o = io_uring_recvmsg_validate(get_buffer(ctx, cqe->flags >> 16), cqe->res, &ctx->msg); if (!o) { fprintf(stderr, "bad recvmsg\n"); return -1; } if (o->namelen > ctx->msg.msg_namelen) { fprintf(stderr, "truncated name\n"); recycle_buffer(ctx, idx); return 0; } if (o->flags & MSG_TRUNC) { unsigned int r; r = io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg); fprintf(stderr, "truncated msg need %u received %u\n", o->payloadlen, r); recycle_buffer(ctx, idx); return 0; } if (ctx->verbose) { struct sockaddr_in *addr = io_uring_recvmsg_name(o); struct sockaddr_in6 *addr6 = (void *)addr; char buff[INET6_ADDRSTRLEN + 1]; const char *name; void *paddr; if (ctx->af == AF_INET6) paddr = &addr6->sin6_addr; else paddr = &addr->sin_addr; name = inet_ntop(ctx->af, paddr, buff, sizeof(buff)); if (!name) name = ""; fprintf(stderr, "received %u bytes %d from [%s]:%d\n", io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg), o->namelen, name, (int)ntohs(addr->sin_port)); } if (get_sqe(ctx, &sqe)) return -1; ctx->send[idx].iov = (struct iovec) { .iov_base = io_uring_recvmsg_payload(o, &ctx->msg), .iov_len = io_uring_recvmsg_payload_length(o, cqe->res, &ctx->msg) }; ctx->send[idx].msg = (struct msghdr) { .msg_namelen = o->namelen, .msg_name = io_uring_recvmsg_name(o), .msg_control = NULL, .msg_controllen = 0, .msg_iov = &ctx->send[idx].iov, .msg_iovlen = 1 }; io_uring_prep_sendmsg(sqe, fdidx, &ctx->send[idx].msg, 0); io_uring_sqe_set_data64(sqe, idx); sqe->flags |= IOSQE_FIXED_FILE; return 0; } static int process_cqe(struct ctx *ctx, struct io_uring_cqe *cqe, int fdidx) { if (cqe->user_data < BUFFERS) return process_cqe_send(ctx, cqe); else return process_cqe_recv(ctx, cqe, fdidx); } int main(int argc, char *argv[]) { struct ctx ctx; int ret; int port = -1; int sockfd; int opt; struct io_uring_cqe *cqes[CQES]; unsigned int count, i; memset(&ctx, 0, sizeof(ctx)); ctx.verbose = false; ctx.af = AF_INET; ctx.buf_shift = BUF_SHIFT; while ((opt = getopt(argc, argv, "6vp:b:")) != -1) { switch (opt) { case '6': ctx.af = AF_INET6; break; case 'p': port = atoi(optarg); break; case 'b': ctx.buf_shift = atoi(optarg); break; case 'v': ctx.verbose = true; break; default: fprintf(stderr, "Usage: %s [-p port] " "[-b log2(BufferSize)] [-6] [-v]\n", argv[0]); exit(-1); } } sockfd = setup_sock(ctx.af, port); if (sockfd < 0) return 1; if (setup_context(&ctx)) { close(sockfd); return 1; } ret = io_uring_register_files(&ctx.ring, &sockfd, 1); if (ret) { fprintf(stderr, "register files: %s\n", strerror(-ret)); return -1; } ret = add_recv(&ctx, 0); if (ret) return 1; while (true) { ret = io_uring_submit_and_wait(&ctx.ring, 1); if (ret == -EINTR) continue; if (ret < 0) { fprintf(stderr, "submit and wait failed %d\n", ret); break; } count = io_uring_peek_batch_cqe(&ctx.ring, &cqes[0], CQES); for (i = 0; i < count; i++) { ret = process_cqe(&ctx, cqes[i], 0); if (ret) goto cleanup; } io_uring_cq_advance(&ctx.ring, count); } cleanup: cleanup_context(&ctx); close(sockfd); return ret; } liburing-2.6/examples/link-cp.c000066400000000000000000000067321461424365000165300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Very basic proof-of-concept for doing a copy with linked SQEs. Needs a * bit of error handling and short read love. */ #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #define QD 64 #define BS (32*1024) struct io_data { size_t offset; int index; struct iovec iov; }; static int infd, outfd; static int inflight; static int setup_context(unsigned entries, struct io_uring *ring) { int ret; ret = io_uring_queue_init(entries, ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return -1; } return 0; } static int get_file_size(int fd, off_t *size) { struct stat st; if (fstat(fd, &st) < 0) return -1; if (S_ISREG(st.st_mode)) { *size = st.st_size; return 0; } else if (S_ISBLK(st.st_mode)) { unsigned long long bytes; if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) return -1; *size = bytes; return 0; } return -1; } static void queue_rw_pair(struct io_uring *ring, off_t size, off_t offset) { struct io_uring_sqe *sqe; struct io_data *data; void *ptr; ptr = malloc(size + sizeof(*data)); data = ptr + size; data->index = 0; data->offset = offset; data->iov.iov_base = ptr; data->iov.iov_len = size; sqe = io_uring_get_sqe(ring); io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); sqe->flags |= IOSQE_IO_LINK; io_uring_sqe_set_data(sqe, data); sqe = io_uring_get_sqe(ring); io_uring_prep_writev(sqe, outfd, &data->iov, 1, offset); io_uring_sqe_set_data(sqe, data); } static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe) { struct io_data *data = io_uring_cqe_get_data(cqe); int ret = 0; data->index++; if (cqe->res < 0) { if (cqe->res == -ECANCELED) { queue_rw_pair(ring, data->iov.iov_len, data->offset); inflight += 2; } else { printf("cqe error: %s\n", strerror(-cqe->res)); ret = 1; } } if (data->index == 2) { void *ptr = (void *) data - data->iov.iov_len; free(ptr); } io_uring_cqe_seen(ring, cqe); return ret; } static int copy_file(struct io_uring *ring, off_t insize) { struct io_uring_cqe *cqe; off_t this_size; off_t offset; offset = 0; while (insize) { int has_inflight = inflight; int depth; while (insize && inflight < QD) { this_size = BS; if (this_size > insize) this_size = insize; queue_rw_pair(ring, this_size, offset); offset += this_size; insize -= this_size; inflight += 2; } if (has_inflight != inflight) io_uring_submit(ring); if (insize) depth = QD; else depth = 1; while (inflight >= depth) { int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait cqe: %s\n", strerror(-ret)); return 1; } if (handle_cqe(ring, cqe)) return 1; inflight--; } } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; off_t insize; int ret; if (argc < 3) { printf("%s: infile outfile\n", argv[0]); return 1; } infd = open(argv[1], O_RDONLY); if (infd < 0) { perror("open infile"); return 1; } outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); if (outfd < 0) { perror("open outfile"); return 1; } if (setup_context(QD, &ring)) return 1; if (get_file_size(infd, &insize)) return 1; ret = copy_file(&ring, insize); close(infd); close(outfd); io_uring_queue_exit(&ring); return ret; } liburing-2.6/examples/napi-busy-poll-client.c000066400000000000000000000255761461424365000213310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple ping/pong client which can use the io_uring NAPI support. * * Needs to be run as root because it sets SCHED_FIFO scheduling class, * but will work without that. * * Example: * * sudo examples/napi-busy-poll-client -a 192.168.2.2 -n100000 -p4444 \ * -b -t10 -u * * send and receive 100k packets, using NAPI. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAXBUFLEN 100 #define PORTNOLEN 10 #define ADDRLEN 80 #define RINGSIZE 1024 #define printable(ch) (isprint((unsigned char)ch) ? ch : '#') enum { IOURING_RECV, IOURING_SEND, IOURING_RECVMSG, IOURING_SENDMSG }; struct ctx { struct io_uring ring; union { struct sockaddr_in6 saddr6; struct sockaddr_in saddr; }; int sockfd; int buffer_len; int num_pings; bool napi_check; union { char buffer[MAXBUFLEN]; struct timespec ts; }; int rtt_index; double *rtt; }; struct options { int num_pings; __u32 timeout; bool sq_poll; bool defer_tw; bool busy_loop; bool prefer_busy_poll; bool ipv6; char port[PORTNOLEN]; char addr[ADDRLEN]; }; static struct option longopts[] = { {"address" , 1, NULL, 'a'}, {"busy" , 0, NULL, 'b'}, {"help" , 0, NULL, 'h'}, {"num_pings", 1, NULL, 'n'}, {"port" , 1, NULL, 'p'}, {"prefer" , 1, NULL, 'u'}, {"sqpoll" , 0, NULL, 's'}, {"timeout" , 1, NULL, 't'}, {NULL , 0, NULL, 0 } }; static void printUsage(const char *name) { fprintf(stderr, "Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]" " [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u||--prefer] [-6] [-h|--help]\n" "--address\n" "-a : remote or local ipv6 address\n" "--busy\n" "-b : busy poll io_uring instead of blocking.\n" "--num_pings\n" "-n : number of pings\n" "--port\n" "-p : port\n" "--sqpoll\n" "-s : Configure io_uring to use SQPOLL thread\n" "--timeout\n" "-t : Configure NAPI busy poll timeout" "--prefer\n" "-u : prefer NAPI busy poll\n" "-6 : use IPV6\n" "--help\n" "-h : Display this usage message\n\n", name); } static void printError(const char *msg, int opt) { if (msg && opt) fprintf(stderr, "%s (-%c)\n", msg, printable(opt)); } static void setProcessScheduler(void) { struct sched_param param; param.sched_priority = sched_get_priority_max(SCHED_FIFO); if (sched_setscheduler(0, SCHED_FIFO, ¶m) < 0) fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n", errno, strerror(errno)); } static double diffTimespec(const struct timespec *time1, const struct timespec *time0) { return (time1->tv_sec - time0->tv_sec) + (time1->tv_nsec - time0->tv_nsec) / 1000000000.0; } static uint64_t encodeUserData(char type, int fd) { return (uint32_t)fd | ((uint64_t)type << 56); } static void decodeUserData(uint64_t data, char *type, int *fd) { *type = data >> 56; *fd = data & 0xffffffffU; } static const char *opTypeToStr(char type) { const char *res; switch (type) { case IOURING_RECV: res = "IOURING_RECV"; break; case IOURING_SEND: res = "IOURING_SEND"; break; case IOURING_RECVMSG: res = "IOURING_RECVMSG"; break; case IOURING_SENDMSG: res = "IOURING_SENDMSG"; break; default: res = "Unknown"; } return res; } static void reportNapi(struct ctx *ctx) { unsigned int napi_id = 0; socklen_t len = sizeof(napi_id); getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len); if (napi_id) printf(" napi id: %d\n", napi_id); else printf(" unassigned napi id\n"); ctx->napi_check = true; } static void sendPing(struct ctx *ctx) { struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); clock_gettime(CLOCK_REALTIME, (struct timespec *)ctx->buffer); io_uring_prep_send(sqe, ctx->sockfd, ctx->buffer, sizeof(struct timespec), 0); sqe->user_data = encodeUserData(IOURING_SEND, ctx->sockfd); } static void receivePing(struct ctx *ctx) { struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); io_uring_prep_recv(sqe, ctx->sockfd, ctx->buffer, MAXBUFLEN, 0); sqe->user_data = encodeUserData(IOURING_RECV, ctx->sockfd); } static void recordRTT(struct ctx *ctx) { struct timespec startTs = ctx->ts; // Send next ping. sendPing(ctx); // Store round-trip time. ctx->rtt[ctx->rtt_index] = diffTimespec(&ctx->ts, &startTs); ctx->rtt_index++; } static void printStats(struct ctx *ctx) { double minRTT = DBL_MAX; double maxRTT = 0.0; double avgRTT = 0.0; double stddevRTT = 0.0; // Calculate min, max, avg. for (int i = 0; i < ctx->rtt_index; i++) { if (ctx->rtt[i] < minRTT) minRTT = ctx->rtt[i]; if (ctx->rtt[i] > maxRTT) maxRTT = ctx->rtt[i]; avgRTT += ctx->rtt[i]; } avgRTT /= ctx->rtt_index; // Calculate stddev. for (int i = 0; i < ctx->rtt_index; i++) stddevRTT += fabs(ctx->rtt[i] - avgRTT); stddevRTT /= ctx->rtt_index; fprintf(stdout, " rtt(us) min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f\n", minRTT * 1000000, avgRTT * 1000000, maxRTT * 1000000, stddevRTT * 1000000); } static int completion(struct ctx *ctx, struct io_uring_cqe *cqe) { char type; int fd; int res = cqe->res; decodeUserData(cqe->user_data, &type, &fd); if (res < 0) { fprintf(stderr, "unexpected %s failure: (%d) %s\n", opTypeToStr(type), -res, strerror(-res)); return -1; } switch (type) { case IOURING_SEND: receivePing(ctx); break; case IOURING_RECV: if (res != sizeof(struct timespec)) { fprintf(stderr, "unexpected ping reply len: %d\n", res); abort(); } if (!ctx->napi_check) { reportNapi(ctx); sendPing(ctx); } else { recordRTT(ctx); } --ctx->num_pings; break; default: fprintf(stderr, "unexpected %s completion\n", opTypeToStr(type)); return -1; break; } return 0; } int main(int argc, char *argv[]) { struct ctx ctx; struct options opt; struct __kernel_timespec *tsPtr; struct __kernel_timespec ts; struct io_uring_params params; struct io_uring_napi napi; int flag, ret, af; memset(&opt, 0, sizeof(struct options)); // Process flags. while ((flag = getopt_long(argc, argv, ":hs:bua:n:p:t:6d:", longopts, NULL)) != -1) { switch (flag) { case 'a': strcpy(opt.addr, optarg); break; case 'b': opt.busy_loop = true; break; case 'h': printUsage(argv[0]); exit(0); break; case 'n': opt.num_pings = atoi(optarg) + 1; break; case 'p': strcpy(opt.port, optarg); break; case 's': opt.sq_poll = !!atoi(optarg); break; case 't': opt.timeout = atoi(optarg); break; case 'u': opt.prefer_busy_poll = true; break; case '6': opt.ipv6 = true; break; case 'd': opt.defer_tw = !!atoi(optarg); break; case ':': printError("Missing argument", optopt); printUsage(argv[0]); exit(-1); break; case '?': printError("Unrecognized option", optopt); printUsage(argv[0]); exit(-1); break; default: fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n"); exit(-1); break; } } if (strlen(opt.addr) == 0) { fprintf(stderr, "address option is mandatory\n"); printUsage(argv[0]); exit(1); } if (opt.ipv6) { af = AF_INET6; ctx.saddr6.sin6_port = htons(atoi(opt.port)); ctx.saddr6.sin6_family = AF_INET6; } else { af = AF_INET; ctx.saddr.sin_port = htons(atoi(opt.port)); ctx.saddr.sin_family = AF_INET; } if (opt.ipv6) ret = inet_pton(af, opt.addr, &ctx.saddr6.sin6_addr); else ret = inet_pton(af, opt.addr, &ctx.saddr.sin_addr); if (ret <= 0) { fprintf(stderr, "inet_pton error for %s\n", optarg); printUsage(argv[0]); exit(1); } // Connect to server. fprintf(stdout, "Connecting to %s... (port=%s) to send %d pings\n", opt.addr, opt.port, opt.num_pings - 1); if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) { fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno)); exit(1); } if (opt.ipv6) ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6)); else ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in)); if (ret < 0) { fprintf(stderr, "connect() failed: (%d) %s\n", errno, strerror(errno)); exit(1); } // Setup ring. memset(¶ms, 0, sizeof(params)); memset(&ts, 0, sizeof(ts)); memset(&napi, 0, sizeof(napi)); params.flags = IORING_SETUP_SINGLE_ISSUER; if (opt.defer_tw) { params.flags |= IORING_SETUP_DEFER_TASKRUN; } else if (opt.sq_poll) { params.flags = IORING_SETUP_SQPOLL; params.sq_thread_idle = 50; } else { params.flags |= IORING_SETUP_COOP_TASKRUN; } ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, ¶ms); if (ret) { fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n", ret, strerror(-ret)); exit(1); } if (opt.timeout || opt.prefer_busy_poll) { napi.prefer_busy_poll = opt.prefer_busy_poll; napi.busy_poll_to = opt.timeout; ret = io_uring_register_napi(&ctx.ring, &napi); if (ret) { fprintf(stderr, "io_uring_register_napi: %d\n", ret); exit(1); } } if (opt.busy_loop) tsPtr = &ts; else tsPtr = NULL; // Use realtime scheduler. setProcessScheduler(); // Copy payload. clock_gettime(CLOCK_REALTIME, &ctx.ts); // Setup context. ctx.napi_check = false; ctx.buffer_len = sizeof(struct timespec); ctx.num_pings = opt.num_pings; ctx.rtt_index = 0; ctx.rtt = (double *)malloc(sizeof(double) * opt.num_pings); if (!ctx.rtt) { fprintf(stderr, "Cannot allocate results array\n"); exit(1); } // Send initial message to get napi id. sendPing(&ctx); while (ctx.num_pings != 0) { int res; unsigned num_completed = 0; unsigned head; struct io_uring_cqe *cqe; do { res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL); if (res >= 0) break; else if (res == -ETIME) continue; fprintf(stderr, "submit_and_wait: %d\n", res); exit(1); } while (1); io_uring_for_each_cqe(&ctx.ring, head, cqe) { ++num_completed; if (completion(&ctx, cqe)) goto out; } if (num_completed) io_uring_cq_advance(&ctx.ring, num_completed); } printStats(&ctx); out: // Clean up. if (opt.timeout || opt.prefer_busy_poll) { ret = io_uring_unregister_napi(&ctx.ring, &napi); if (ret) fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); if (opt.timeout != napi.busy_poll_to || opt.prefer_busy_poll != napi.prefer_busy_poll) { fprintf(stderr, "Expected busy poll to = %d, got %d\n", opt.timeout, napi.busy_poll_to); fprintf(stderr, "Expected prefer busy poll = %d, got %d\n", opt.prefer_busy_poll, napi.prefer_busy_poll); } } else { ret = io_uring_unregister_napi(&ctx.ring, NULL); if (ret) fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); } io_uring_queue_exit(&ctx.ring); free(ctx.rtt); close(ctx.sockfd); return 0; } liburing-2.6/examples/napi-busy-poll-server.c000066400000000000000000000226731461424365000213540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple ping/pong backend which can use the io_uring NAPI support. * * Needs to be run as root because it sets SCHED_FIFO scheduling class, * but will work without that. * * Example: * * sudo examples/napi-busy-poll-server -l -a 192.168.2.2 -n100000 \ * -p4444 -t10 -b -u * * will respond to 100k packages, using NAPI. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAXBUFLEN 100 #define PORTNOLEN 10 #define ADDRLEN 80 #define RINGSIZE 1024 #define printable(ch) (isprint((unsigned char)ch) ? ch : '#') enum { IOURING_RECV, IOURING_SEND, IOURING_RECVMSG, IOURING_SENDMSG }; struct ctx { struct io_uring ring; union { struct sockaddr_in6 saddr6; struct sockaddr_in saddr; }; struct iovec iov; struct msghdr msg; int sockfd; int buffer_len; int num_pings; bool napi_check; union { char buffer[MAXBUFLEN]; struct timespec ts; }; }; struct options { int num_pings; __u32 timeout; bool listen; bool defer_tw; bool sq_poll; bool busy_loop; bool prefer_busy_poll; bool ipv6; char port[PORTNOLEN]; char addr[ADDRLEN]; }; static struct options opt; static struct option longopts[] = { {"address" , 1, NULL, 'a'}, {"busy" , 0, NULL, 'b'}, {"help" , 0, NULL, 'h'}, {"listen" , 0, NULL, 'l'}, {"num_pings", 1, NULL, 'n'}, {"port" , 1, NULL, 'p'}, {"prefer" , 1, NULL, 'u'}, {"sqpoll" , 0, NULL, 's'}, {"timeout" , 1, NULL, 't'}, {NULL , 0, NULL, 0 } }; static void printUsage(const char *name) { fprintf(stderr, "Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]" " [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u|--prefer] [-6] [-h|--help]\n" " --listen\n" "-l : Server mode\n" "--address\n" "-a : remote or local ipv6 address\n" "--busy\n" "-b : busy poll io_uring instead of blocking.\n" "--num_pings\n" "-n : number of pings\n" "--port\n" "-p : port\n" "--sqpoll\n" "-s : Configure io_uring to use SQPOLL thread\n" "--timeout\n" "-t : Configure NAPI busy poll timeout" "--prefer\n" "-u : prefer NAPI busy poll\n" "-6 : use IPV6\n" "--help\n" "-h : Display this usage message\n\n", name); } static void printError(const char *msg, int opt) { if (msg && opt) fprintf(stderr, "%s (-%c)\n", msg, printable(opt)); } static void setProcessScheduler(void) { struct sched_param param; param.sched_priority = sched_get_priority_max(SCHED_FIFO); if (sched_setscheduler(0, SCHED_FIFO, ¶m) < 0) fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n", errno, strerror(errno)); } static uint64_t encodeUserData(char type, int fd) { return (uint32_t)fd | ((__u64)type << 56); } static void decodeUserData(uint64_t data, char *type, int *fd) { *type = data >> 56; *fd = data & 0xffffffffU; } static const char *opTypeToStr(char type) { const char *res; switch (type) { case IOURING_RECV: res = "IOURING_RECV"; break; case IOURING_SEND: res = "IOURING_SEND"; break; case IOURING_RECVMSG: res = "IOURING_RECVMSG"; break; case IOURING_SENDMSG: res = "IOURING_SENDMSG"; break; default: res = "Unknown"; } return res; } static void reportNapi(struct ctx *ctx) { unsigned int napi_id = 0; socklen_t len = sizeof(napi_id); getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len); if (napi_id) printf(" napi id: %d\n", napi_id); else printf(" unassigned napi id\n"); ctx->napi_check = true; } static void sendPing(struct ctx *ctx) { struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring); io_uring_prep_sendmsg(sqe, ctx->sockfd, &ctx->msg, 0); sqe->user_data = encodeUserData(IOURING_SENDMSG, ctx->sockfd); } static void receivePing(struct ctx *ctx) { struct io_uring_sqe *sqe; bzero(&ctx->msg, sizeof(struct msghdr)); if (opt.ipv6) { ctx->msg.msg_name = &ctx->saddr6; ctx->msg.msg_namelen = sizeof(struct sockaddr_in6); } else { ctx->msg.msg_name = &ctx->saddr; ctx->msg.msg_namelen = sizeof(struct sockaddr_in); } ctx->iov.iov_base = ctx->buffer; ctx->iov.iov_len = MAXBUFLEN; ctx->msg.msg_iov = &ctx->iov; ctx->msg.msg_iovlen = 1; sqe = io_uring_get_sqe(&ctx->ring); io_uring_prep_recvmsg(sqe, ctx->sockfd, &ctx->msg, 0); sqe->user_data = encodeUserData(IOURING_RECVMSG, ctx->sockfd); } static void completion(struct ctx *ctx, struct io_uring_cqe *cqe) { char type; int fd; int res = cqe->res; decodeUserData(cqe->user_data, &type, &fd); if (res < 0) { fprintf(stderr, "unexpected %s failure: (%d) %s\n", opTypeToStr(type), -res, strerror(-res)); abort(); } switch (type) { case IOURING_SENDMSG: receivePing(ctx); --ctx->num_pings; break; case IOURING_RECVMSG: ctx->iov.iov_len = res; sendPing(ctx); if (!ctx->napi_check) reportNapi(ctx); break; default: fprintf(stderr, "unexpected %s completion\n", opTypeToStr(type)); abort(); break; } } int main(int argc, char *argv[]) { int flag; struct ctx ctx; struct __kernel_timespec *tsPtr; struct __kernel_timespec ts; struct io_uring_params params; struct io_uring_napi napi; int ret, af; memset(&opt, 0, sizeof(struct options)); // Process flags. while ((flag = getopt_long(argc, argv, ":lhs:bua:n:p:t:6d:", longopts, NULL)) != -1) { switch (flag) { case 'a': strcpy(opt.addr, optarg); break; case 'b': opt.busy_loop = true; break; case 'h': printUsage(argv[0]); exit(0); break; case 'l': opt.listen = true; break; case 'n': opt.num_pings = atoi(optarg) + 1; break; case 'p': strcpy(opt.port, optarg); break; case 's': opt.sq_poll = !!atoi(optarg); break; case 't': opt.timeout = atoi(optarg); break; case 'u': opt.prefer_busy_poll = true; break; case '6': opt.ipv6 = true; break; case 'd': opt.defer_tw = !!atoi(optarg); break; case ':': printError("Missing argument", optopt); printUsage(argv[0]); exit(-1); break; case '?': printError("Unrecognized option", optopt); printUsage(argv[0]); exit(-1); break; default: fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n"); exit(-1); break; } } if (strlen(opt.addr) == 0) { fprintf(stderr, "address option is mandatory\n"); printUsage(argv[0]); exit(1); } if (opt.ipv6) { af = AF_INET6; ctx.saddr6.sin6_port = htons(atoi(opt.port)); ctx.saddr6.sin6_family = AF_INET6; } else { af = AF_INET; ctx.saddr.sin_port = htons(atoi(opt.port)); ctx.saddr.sin_family = AF_INET; } if (opt.ipv6) ret = inet_pton(AF_INET6, opt.addr, &ctx.saddr6.sin6_addr); else ret = inet_pton(AF_INET, opt.addr, &ctx.saddr.sin_addr); if (ret <= 0) { fprintf(stderr, "inet_pton error for %s\n", optarg); printUsage(argv[0]); exit(1); } // Connect to server. fprintf(stdout, "Listening %s : %s...\n", opt.addr, opt.port); if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) { fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno)); exit(1); } if (opt.ipv6) ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6)); else ret = bind(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in)); if (ret < 0) { fprintf(stderr, "bind() failed: (%d) %s\n", errno, strerror(errno)); exit(1); } // Setup ring. memset(¶ms, 0, sizeof(params)); memset(&ts, 0, sizeof(ts)); memset(&napi, 0, sizeof(napi)); params.flags = IORING_SETUP_SINGLE_ISSUER; if (opt.defer_tw) { params.flags |= IORING_SETUP_DEFER_TASKRUN; } else if (opt.sq_poll) { params.flags = IORING_SETUP_SQPOLL; params.sq_thread_idle = 50; } else { params.flags |= IORING_SETUP_COOP_TASKRUN; } ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, ¶ms); if (ret) { fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n", ret, strerror(-ret)); exit(1); } if (opt.timeout || opt.prefer_busy_poll) { napi.prefer_busy_poll = opt.prefer_busy_poll; napi.busy_poll_to = opt.timeout; ret = io_uring_register_napi(&ctx.ring, &napi); if (ret) { fprintf(stderr, "io_uring_register_napi: %d\n", ret); exit(1); } } if (opt.busy_loop) tsPtr = &ts; else tsPtr = NULL; // Use realtime scheduler. setProcessScheduler(); // Copy payload. clock_gettime(CLOCK_REALTIME, &ctx.ts); // Setup context. ctx.napi_check = false; ctx.buffer_len = sizeof(struct timespec); ctx.num_pings = opt.num_pings; // Receive initial message to get napi id. receivePing(&ctx); while (ctx.num_pings != 0) { int res; unsigned int num_completed = 0; unsigned int head; struct io_uring_cqe *cqe; do { res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL); if (res >= 0) break; else if (res == -ETIME) continue; fprintf(stderr, "submit_and_wait: %d\n", res); exit(1); } while (1); io_uring_for_each_cqe(&ctx.ring, head, cqe) { ++num_completed; completion(&ctx, cqe); } if (num_completed) io_uring_cq_advance(&ctx.ring, num_completed); } // Clean up. if (opt.timeout || opt.prefer_busy_poll) { ret = io_uring_unregister_napi(&ctx.ring, &napi); if (ret) fprintf(stderr, "io_uring_unregister_napi: %d\n", ret); } io_uring_queue_exit(&ctx.ring); close(ctx.sockfd); return 0; } liburing-2.6/examples/poll-bench.c000066400000000000000000000041211461424365000172040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include "liburing.h" static char buf[4096]; static unsigned long runtime_ms = 10000; static unsigned long gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } int main(void) { unsigned long tstop; unsigned long nr_reqs = 0; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int pipe1[2]; int ret, i, qd = 32; if (pipe(pipe1) != 0) { perror("pipe"); return 1; } ret = io_uring_queue_init(1024, &ring, IORING_SETUP_SINGLE_ISSUER); if (ret == -EINVAL) { fprintf(stderr, "can't single\n"); ret = io_uring_queue_init(1024, &ring, 0); } if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return 1; } ret = io_uring_register_files(&ring, pipe1, 2); if (ret < 0) { fprintf(stderr, "io_uring_register_files failed\n"); return 1; } ret = io_uring_register_ring_fd(&ring); if (ret < 0) { fprintf(stderr, "io_uring_register_ring_fd failed\n"); return 1; } tstop = gettimeofday_ms() + runtime_ms; do { for (i = 0; i < qd; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, 0, POLLIN); sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 1; } ret = io_uring_submit(&ring); if (ret != qd) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); return 1; } ret = write(pipe1[1], buf, 1); if (ret != 1) { fprintf(stderr, "write failed %i\n", errno); return 1; } ret = read(pipe1[0], buf, 1); if (ret != 1) { fprintf(stderr, "read failed %i\n", errno); return 1; } for (i = 0; i < qd; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "child: wait completion %d\n", ret); break; } io_uring_cqe_seen(&ring, cqe); nr_reqs++; } } while (gettimeofday_ms() < tstop); fprintf(stderr, "requests/s: %lu\n", nr_reqs * 1000UL / runtime_ms); return 0; } liburing-2.6/examples/proxy.c000066400000000000000000001733571461424365000163640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Sample program that can act either as a packet sink, where it just receives * packets and doesn't do anything with them, or it can act as a proxy where it * receives packets and then sends them to a new destination. The proxy can * be unidirectional (-B0), or bi-direction (-B1). * * Examples: * * Act as a proxy, listening on port 4444, and send data to 192.168.2.6 on port * 4445. Use multishot receive, DEFER_TASKRUN, and fixed files * * ./proxy -m1 -r4444 -H 192.168.2.6 -p4445 * * Same as above, but utilize send bundles (-C1, requires -u1 send_ring) as well * with ring provided send buffers, and recv bundles (-c1). * * ./proxy -m1 -c1 -u1 -C1 -r4444 -H 192.168.2.6 -p4445 * * Act as a bi-directional proxy, listening on port 8888, and send data back * and forth between host and 192.168.2.6 on port 22. Use multishot receive, * DEFER_TASKRUN, fixed files, and buffers of size 1500. * * ./proxy -m1 -B1 -b1500 -r8888 -H 192.168.2.6 -p22 * * Act a sink, listening on port 4445, using multishot receive, DEFER_TASKRUN, * and fixed files: * * ./proxy -m1 -s1 -r4445 * * Run with -h to see a list of options, and their defaults. * * (C) 2024 Jens Axboe * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "proxy.h" #include "helpers.h" /* * Will go away once/if bundles are upstreamed and we put the generic * definitions in the kernel header. */ #ifndef IORING_RECVSEND_BUNDLE #define IORING_RECVSEND_BUNDLE (1U << 4) #endif #ifndef IORING_FEAT_SEND_BUF_SELECT #define IORING_FEAT_SEND_BUF_SELECT (1U << 14) #endif static int cur_bgid = 1; static int nr_conns; static int open_conns; static long page_size; static unsigned long event_loops; static unsigned long events; static int recv_mshot = 1; static int sqpoll; static int defer_tw = 1; static int is_sink; static int fixed_files = 1; static char *host = "192.168.3.2"; static int send_port = 4445; static int receive_port = 4444; static int buf_size = 32; static int bidi; static int ipv6; static int napi; static int napi_timeout; static int wait_batch = 1; static int wait_usec = 1000000; static int rcv_msg; static int snd_msg; static int snd_zc; static int send_ring = -1; static int snd_bundle; static int rcv_bundle; static int use_huge; static int ext_stat; static int verbose; static int nr_bufs = 256; static int br_mask; static int ring_size = 128; static pthread_mutex_t thread_lock; static struct timeval last_housekeeping; /* * For sendmsg/recvmsg. recvmsg just has a single vec, sendmsg will have * two vecs - one that is currently submitted and being sent, and one that * is being prepared. When a new sendmsg is issued, we'll swap which one we * use. For send, even though we don't pass in the iovec itself, we use the * vec to serialize the sends to avoid reordering. */ struct msg_vec { struct iovec *iov; /* length of allocated vec */ int vec_size; /* length currently being used */ int iov_len; /* only for send, current index we're processing */ int cur_iov; }; struct io_msg { struct msghdr msg; struct msg_vec vecs[2]; /* current msg_vec being prepared */ int vec_index; }; /* * Per socket stats per connection. For bi-directional, we'll have both * sends and receives on each socket, this helps track them seperately. * For sink or one directional, each of the two stats will be only sends * or receives, not both. */ struct conn_dir { int index; int pending_shutdown; int pending_send; int pending_recv; int snd_notif; int out_buffers; int rcv, rcv_shrt, rcv_enobufs, rcv_mshot; int snd, snd_shrt, snd_enobufs, snd_busy, snd_mshot; int snd_next_bid; int rcv_next_bid; int *rcv_bucket; int *snd_bucket; unsigned long in_bytes, out_bytes; /* only ever have a single recv pending */ struct io_msg io_rcv_msg; /* one send that is inflight, and one being prepared for the next one */ struct io_msg io_snd_msg; }; enum { CONN_F_STARTED = 1, CONN_F_DISCONNECTING = 2, CONN_F_DISCONNECTED = 4, CONN_F_PENDING_SHUTDOWN = 8, CONN_F_STATS_SHOWN = 16, CONN_F_END_TIME = 32, CONN_F_REAPED = 64, }; /* * buffer ring belonging to a connection */ struct conn_buf_ring { struct io_uring_buf_ring *br; void *buf; int bgid; }; struct conn { struct io_uring ring; /* receive side buffer ring, new data arrives here */ struct conn_buf_ring in_br; /* if send_ring is used, outgoing data to send */ struct conn_buf_ring out_br; int tid; int in_fd, out_fd; int pending_cancels; int flags; struct conn_dir cd[2]; struct timeval start_time, end_time; union { struct sockaddr_in addr; struct sockaddr_in6 addr6; }; pthread_t thread; pthread_barrier_t startup_barrier; }; #define MAX_CONNS 1024 static struct conn conns[MAX_CONNS]; #define vlog(str, ...) do { \ if (verbose) \ printf(str, ##__VA_ARGS__); \ } while (0) static int prep_next_send(struct io_uring *ring, struct conn *c, struct conn_dir *cd, int fd); static void *thread_main(void *data); static struct conn *cqe_to_conn(struct io_uring_cqe *cqe) { struct userdata ud = { .val = cqe->user_data }; return &conns[ud.op_tid & TID_MASK]; } static struct conn_dir *cqe_to_conn_dir(struct conn *c, struct io_uring_cqe *cqe) { int fd = cqe_to_fd(cqe); return &c->cd[fd != c->in_fd]; } static int other_dir_fd(struct conn *c, int fd) { if (c->in_fd == fd) return c->out_fd; return c->in_fd; } /* currently active msg_vec */ static struct msg_vec *msg_vec(struct io_msg *imsg) { return &imsg->vecs[imsg->vec_index]; } static struct msg_vec *snd_msg_vec(struct conn_dir *cd) { return msg_vec(&cd->io_snd_msg); } /* * Goes from accept new connection -> create socket, connect to end * point, prepare recv, on receive do send (unless sink). If either ends * disconnects, we transition to shutdown and then close. */ enum { __ACCEPT = 1, __SOCK = 2, __CONNECT = 3, __RECV = 4, __RECVMSG = 5, __SEND = 6, __SENDMSG = 7, __SHUTDOWN = 8, __CANCEL = 9, __CLOSE = 10, __FD_PASS = 11, __NOP = 12, __STOP = 13, }; struct error_handler { const char *name; int (*error_fn)(struct error_handler *, struct io_uring *, struct io_uring_cqe *); }; static int recv_error(struct error_handler *err, struct io_uring *ring, struct io_uring_cqe *cqe); static int send_error(struct error_handler *err, struct io_uring *ring, struct io_uring_cqe *cqe); static int default_error(struct error_handler *err, struct io_uring __attribute__((__unused__)) *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); fprintf(stderr, "%d: %s error %s\n", c->tid, err->name, strerror(-cqe->res)); fprintf(stderr, "fd=%d, bid=%d\n", cqe_to_fd(cqe), cqe_to_bid(cqe)); return 1; } /* * Move error handling out of the normal handling path, cleanly seperating * them. If an opcode doesn't need any error handling, set it to NULL. If * it wants to stop the connection at that point and not do anything else, * then the default handler can be used. Only receive has proper error * handling, as we can get -ENOBUFS which is not a fatal condition. It just * means we need to wait on buffer replenishing before re-arming the receive. */ static struct error_handler error_handlers[] = { { .name = "NULL", .error_fn = NULL, }, { .name = "ACCEPT", .error_fn = default_error, }, { .name = "SOCK", .error_fn = default_error, }, { .name = "CONNECT", .error_fn = default_error, }, { .name = "RECV", .error_fn = recv_error, }, { .name = "RECVMSG", .error_fn = recv_error, }, { .name = "SEND", .error_fn = send_error, }, { .name = "SENDMSG", .error_fn = send_error, }, { .name = "SHUTDOWN", .error_fn = NULL, }, { .name = "CANCEL", .error_fn = NULL, }, { .name = "CLOSE", .error_fn = NULL, }, { .name = "FD_PASS", .error_fn = default_error, }, { .name = "NOP", .error_fn = NULL, }, { .name = "STOP", .error_fn = default_error, }, }; static void free_buffer_ring(struct io_uring *ring, struct conn_buf_ring *cbr) { if (!cbr->br) return; io_uring_free_buf_ring(ring, cbr->br, nr_bufs, cbr->bgid); cbr->br = NULL; if (use_huge) munmap(cbr->buf, buf_size * nr_bufs); else free(cbr->buf); } static void free_buffer_rings(struct io_uring *ring, struct conn *c) { free_buffer_ring(ring, &c->in_br); free_buffer_ring(ring, &c->out_br); } /* * Setup a ring provided buffer ring for each connection. If we get -ENOBUFS * on receive, for multishot receive we'll wait for half the provided buffers * to be returned by pending sends, then re-arm the multishot receive. If * this happens too frequently (see enobufs= stat), then the ring size is * likely too small. Use -nXX to make it bigger. See recv_enobufs(). * * The alternative here would be to use the older style provided buffers, * where you simply setup a buffer group and use SQEs with * io_urign_prep_provide_buffers() to add to the pool. But that approach is * slower and has been deprecated by using the faster ring provided buffers. */ static int setup_recv_ring(struct io_uring *ring, struct conn *c) { struct conn_buf_ring *cbr = &c->in_br; int ret, i; size_t len; void *ptr; len = buf_size * nr_bufs; if (use_huge) { cbr->buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_HUGETLB|MAP_HUGE_2MB|MAP_ANONYMOUS, -1, 0); if (cbr->buf == MAP_FAILED) { perror("mmap"); return 1; } } else { if (posix_memalign(&cbr->buf, page_size, len)) { perror("posix memalign"); return 1; } } cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret); if (!cbr->br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ptr = cbr->buf; for (i = 0; i < nr_bufs; i++) { vlog("%d: add bid %d, data %p\n", c->tid, i, ptr); io_uring_buf_ring_add(cbr->br, ptr, buf_size, i, br_mask, i); ptr += buf_size; } io_uring_buf_ring_advance(cbr->br, nr_bufs); printf("%d: recv buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs); return 0; } /* * If 'send_ring' is used and the kernel supports it, we can skip serializing * sends as the data will be ordered regardless. This reduces the send handling * complexity, as buffers can always be added to the outgoing ring and will be * processed in the order in which they were added. */ static int setup_send_ring(struct io_uring *ring, struct conn *c) { struct conn_buf_ring *cbr = &c->out_br; int ret; cbr->br = io_uring_setup_buf_ring(ring, nr_bufs, cbr->bgid, 0, &ret); if (!cbr->br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } printf("%d: send buffer ring bgid %d, bufs %d\n", c->tid, cbr->bgid, nr_bufs); return 0; } static int setup_send_zc(struct io_uring *ring, struct conn *c) { struct iovec *iovs; void *buf; int i, ret; if (snd_msg) return 0; buf = c->in_br.buf; iovs = calloc(nr_bufs, sizeof(struct iovec)); for (i = 0; i < nr_bufs; i++) { iovs[i].iov_base = buf; iovs[i].iov_len = buf_size; buf += buf_size; } ret = io_uring_register_buffers(ring, iovs, nr_bufs); if (ret) { fprintf(stderr, "failed registering buffers: %d\n", ret); free(iovs); return ret; } free(iovs); return 0; } /* * Setup an input and output buffer ring. */ static int setup_buffer_rings(struct io_uring *ring, struct conn *c) { int ret; /* no locking needed on cur_bgid, parent serializes setup */ c->in_br.bgid = cur_bgid++; c->out_br.bgid = cur_bgid++; c->out_br.br = NULL; ret = setup_recv_ring(ring, c); if (ret) return ret; if (is_sink) return 0; if (snd_zc) { ret = setup_send_zc(ring, c); if (ret) return ret; } if (send_ring) { ret = setup_send_ring(ring, c); if (ret) { free_buffer_ring(ring, &c->in_br); return ret; } } return 0; } static void show_buckets(struct conn_dir *cd) { int i; if (!cd->rcv_bucket || !cd->snd_bucket) return; printf("\t Packets per recv/send:\n"); for (i = 0; i < nr_bufs; i++) { if (!cd->rcv_bucket[i] && !cd->snd_bucket[i]) continue; printf("\t bucket(%3d): rcv=%u snd=%u\n", i, cd->rcv_bucket[i], cd->snd_bucket[i]); } } static void __show_stats(struct conn *c) { unsigned long msec, qps; unsigned long bytes, bw; struct conn_dir *cd; int i; if (c->flags & (CONN_F_STATS_SHOWN | CONN_F_REAPED)) return; if (!(c->flags & CONN_F_STARTED)) return; if (!(c->flags & CONN_F_END_TIME)) gettimeofday(&c->end_time, NULL); msec = (c->end_time.tv_sec - c->start_time.tv_sec) * 1000; msec += (c->end_time.tv_usec - c->start_time.tv_usec) / 1000; qps = 0; for (i = 0; i < 2; i++) qps += c->cd[i].rcv + c->cd[i].snd; if (!qps) return; if (msec) qps = (qps * 1000) / msec; printf("Conn %d/(in_fd=%d, out_fd=%d): qps=%lu, msec=%lu\n", c->tid, c->in_fd, c->out_fd, qps, msec); bytes = 0; for (i = 0; i < 2; i++) { cd = &c->cd[i]; if (!cd->in_bytes && !cd->out_bytes && !cd->snd && !cd->rcv) continue; bytes += cd->in_bytes; bytes += cd->out_bytes; printf("\t%3d: rcv=%u (short=%u, enobufs=%d), snd=%u (short=%u," " busy=%u, enobufs=%d)\n", i, cd->rcv, cd->rcv_shrt, cd->rcv_enobufs, cd->snd, cd->snd_shrt, cd->snd_busy, cd->snd_enobufs); printf("\t : in_bytes=%lu (Kb %lu), out_bytes=%lu (Kb %lu)\n", cd->in_bytes, cd->in_bytes >> 10, cd->out_bytes, cd->out_bytes >> 10); printf("\t : mshot_rcv=%d, mshot_snd=%d\n", cd->rcv_mshot, cd->snd_mshot); show_buckets(cd); } if (msec) { bytes *= 8UL; bw = bytes / 1000; bw /= msec; printf("\tBW=%'luMbit\n", bw); } c->flags |= CONN_F_STATS_SHOWN; } static void show_stats(void) { float events_per_loop = 0.0; static int stats_shown; int i; if (stats_shown) return; if (events) events_per_loop = (float) events / (float) event_loops; printf("Event loops: %lu, events %lu, events per loop %.2f\n", event_loops, events, events_per_loop); for (i = 0; i < MAX_CONNS; i++) { struct conn *c = &conns[i]; __show_stats(c); } stats_shown = 1; } static void sig_int(int __attribute__((__unused__)) sig) { printf("\n"); show_stats(); exit(1); } /* * Special cased for SQPOLL only, as we don't control when SQEs are consumed if * that is used. Hence we may need to wait for the SQPOLL thread to keep up * until we can get a new SQE. All other cases will break immediately, with a * fresh SQE. * * If we grossly undersized our SQ ring, getting a NULL sqe can happen even * for the !SQPOLL case if we're handling a lot of CQEs in our event loop * and multishot isn't used. We can do io_uring_submit() to flush what we * have here. Only caveat here is that if linked requests are used, SQEs * would need to be allocated upfront as a link chain is only valid within * a single submission cycle. */ static struct io_uring_sqe *get_sqe(struct io_uring *ring) { struct io_uring_sqe *sqe; do { sqe = io_uring_get_sqe(ring); if (sqe) break; if (!sqpoll) io_uring_submit(ring); else io_uring_sqring_wait(ring); } while (1); return sqe; } /* * See __encode_userdata() for how we encode sqe->user_data, which is passed * back as cqe->user_data at completion time. */ static void encode_userdata(struct io_uring_sqe *sqe, struct conn *c, int op, int bid, int fd) { __encode_userdata(sqe, c->tid, op, bid, fd); } static void __submit_receive(struct io_uring *ring, struct conn *c, struct conn_dir *cd, int fd) { struct conn_buf_ring *cbr = &c->in_br; struct io_uring_sqe *sqe; vlog("%d: submit receive fd=%d\n", c->tid, fd); assert(!cd->pending_recv); cd->pending_recv = 1; /* * For both recv and multishot receive, we use the ring provided * buffers. These are handed to the application ahead of time, and * are consumed when a receive triggers. Note that the address and * length of the receive are set to NULL/0, and we assign the * sqe->buf_group to tell the kernel which buffer group ID to pick * a buffer from. Finally, IOSQE_BUFFER_SELECT is set to tell the * kernel that we want a buffer picked for this request, we are not * passing one in with the request. */ sqe = get_sqe(ring); if (rcv_msg) { struct io_msg *imsg = &cd->io_rcv_msg; struct msghdr *msg = &imsg->msg; memset(msg, 0, sizeof(*msg)); msg->msg_iov = msg_vec(imsg)->iov; msg->msg_iovlen = msg_vec(imsg)->iov_len; if (recv_mshot) { cd->rcv_mshot++; io_uring_prep_recvmsg_multishot(sqe, fd, &imsg->msg, 0); } else { io_uring_prep_recvmsg(sqe, fd, &imsg->msg, 0); } } else { if (recv_mshot) { cd->rcv_mshot++; io_uring_prep_recv_multishot(sqe, fd, NULL, 0, 0); } else { io_uring_prep_recv(sqe, fd, NULL, 0, 0); } } encode_userdata(sqe, c, __RECV, 0, fd); sqe->buf_group = cbr->bgid; sqe->flags |= IOSQE_BUFFER_SELECT; if (fixed_files) sqe->flags |= IOSQE_FIXED_FILE; if (rcv_bundle) sqe->ioprio |= IORING_RECVSEND_BUNDLE; } /* * One directional just arms receive on our in_fd */ static void submit_receive(struct io_uring *ring, struct conn *c) { __submit_receive(ring, c, &c->cd[0], c->in_fd); } /* * Bi-directional arms receive on both in and out fd */ static void submit_bidi_receive(struct io_uring *ring, struct conn *c) { __submit_receive(ring, c, &c->cd[0], c->in_fd); __submit_receive(ring, c, &c->cd[1], c->out_fd); } /* * We hit -ENOBUFS, which means that we ran out of buffers in our current * provided buffer group. This can happen if there's an imbalance between the * receives coming in and the sends being processed, particularly with multishot * receive as they can trigger very quickly. If this happens, defer arming a * new receive until we've replenished half of the buffer pool by processing * pending sends. */ static void recv_enobufs(struct io_uring *ring, struct conn *c, struct conn_dir *cd, int fd) { vlog("%d: enobufs hit\n", c->tid); cd->rcv_enobufs++; /* * If we're a sink, mark rcv as rearm. If we're not, then mark us as * needing a rearm for receive and send. The completing send will * kick the recv rearm. */ if (!is_sink) { int do_recv_arm = 1; if (!cd->pending_send) do_recv_arm = !prep_next_send(ring, c, cd, fd); if (do_recv_arm) __submit_receive(ring, c, &c->cd[0], c->in_fd); } else { __submit_receive(ring, c, &c->cd[0], c->in_fd); } } /* * Kill this socket - submit a shutdown and link a close to it. We don't * care about shutdown status, so mark it as not needing to post a CQE unless * it fails. */ static void queue_shutdown_close(struct io_uring *ring, struct conn *c, int fd) { struct io_uring_sqe *sqe1, *sqe2; /* * On the off chance that we run out of SQEs after the first one, * grab two upfront. This it to prevent our link not working if * get_sqe() ends up doing submissions to free up an SQE, as links * are not valid across separate submissions. */ sqe1 = get_sqe(ring); sqe2 = get_sqe(ring); io_uring_prep_shutdown(sqe1, fd, SHUT_RDWR); if (fixed_files) sqe1->flags |= IOSQE_FIXED_FILE; sqe1->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; encode_userdata(sqe1, c, __SHUTDOWN, 0, fd); if (fixed_files) io_uring_prep_close_direct(sqe2, fd); else io_uring_prep_close(sqe2, fd); encode_userdata(sqe2, c, __CLOSE, 0, fd); } /* * This connection is going away, queue a cancel for any pending recv, for * example, we have pending for this ring. For completeness, we issue a cancel * for any request we have pending for both in_fd and out_fd. */ static void queue_cancel(struct io_uring *ring, struct conn *c) { struct io_uring_sqe *sqe; int flags = 0; if (fixed_files) flags |= IORING_ASYNC_CANCEL_FD_FIXED; sqe = get_sqe(ring); io_uring_prep_cancel_fd(sqe, c->in_fd, flags); encode_userdata(sqe, c, __CANCEL, 0, c->in_fd); c->pending_cancels++; if (c->out_fd != -1) { sqe = get_sqe(ring); io_uring_prep_cancel_fd(sqe, c->out_fd, flags); encode_userdata(sqe, c, __CANCEL, 0, c->out_fd); c->pending_cancels++; } io_uring_submit(ring); } static int pending_shutdown(struct conn *c) { return c->cd[0].pending_shutdown + c->cd[1].pending_shutdown; } static bool should_shutdown(struct conn *c) { int i; if (!pending_shutdown(c)) return false; if (is_sink) return true; if (!bidi) return c->cd[0].in_bytes == c->cd[1].out_bytes; for (i = 0; i < 2; i++) { if (c->cd[0].rcv != c->cd[1].snd) return false; if (c->cd[1].rcv != c->cd[0].snd) return false; } return true; } /* * Close this connection - send a ring message to the connection with intent * to stop. When the client gets the message, it will initiate the stop. */ static void __close_conn(struct io_uring *ring, struct conn *c) { struct io_uring_sqe *sqe; uint64_t user_data; printf("Client %d: queueing stop\n", c->tid); user_data = __raw_encode(c->tid, __STOP, 0, 0); sqe = io_uring_get_sqe(ring); io_uring_prep_msg_ring(sqe, c->ring.ring_fd, 0, user_data, 0); encode_userdata(sqe, c, __NOP, 0, 0); io_uring_submit(ring); } static void close_cd(struct conn *c, struct conn_dir *cd) { cd->pending_shutdown = 1; if (cd->pending_send) return; if (!(c->flags & CONN_F_PENDING_SHUTDOWN)) { gettimeofday(&c->end_time, NULL); c->flags |= CONN_F_PENDING_SHUTDOWN | CONN_F_END_TIME; } } /* * We're done with this buffer, add it back to our pool so the kernel is * free to use it again. */ static int replenish_buffer(struct conn_buf_ring *cbr, int bid, int offset) { void *this_buf = cbr->buf + bid * buf_size; assert(bid < nr_bufs); io_uring_buf_ring_add(cbr->br, this_buf, buf_size, bid, br_mask, offset); return buf_size; } /* * Iterate buffers from '*bid' and with a total size of 'bytes' and add them * back to our receive ring so they can be reused for new receives. */ static int replenish_buffers(struct conn *c, int *bid, int bytes) { struct conn_buf_ring *cbr = &c->in_br; int nr_packets = 0; while (bytes) { int this_len = replenish_buffer(cbr, *bid, nr_packets); if (this_len > bytes) this_len = bytes; bytes -= this_len; *bid = (*bid + 1) & (nr_bufs - 1); nr_packets++; } io_uring_buf_ring_advance(cbr->br, nr_packets); return nr_packets; } static void free_mvec(struct msg_vec *mvec) { free(mvec->iov); mvec->iov = NULL; } static void init_mvec(struct msg_vec *mvec) { memset(mvec, 0, sizeof(*mvec)); mvec->iov = malloc(sizeof(struct iovec)); mvec->vec_size = 1; } static void init_msgs(struct conn_dir *cd) { memset(&cd->io_snd_msg, 0, sizeof(cd->io_snd_msg)); memset(&cd->io_rcv_msg, 0, sizeof(cd->io_rcv_msg)); init_mvec(&cd->io_snd_msg.vecs[0]); init_mvec(&cd->io_snd_msg.vecs[1]); init_mvec(&cd->io_rcv_msg.vecs[0]); } static void free_msgs(struct conn_dir *cd) { free_mvec(&cd->io_snd_msg.vecs[0]); free_mvec(&cd->io_snd_msg.vecs[1]); free_mvec(&cd->io_rcv_msg.vecs[0]); } /* * Multishot accept completion triggered. If we're acting as a sink, we're * good to go. Just issue a receive for that case. If we're acting as a proxy, * then start opening a socket that we can use to connect to the other end. */ static int handle_accept(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c; int i; if (nr_conns == MAX_CONNS) { fprintf(stderr, "max clients reached %d\n", nr_conns); return 1; } /* main thread handles this, which is obviously serialized */ c = &conns[nr_conns]; c->tid = nr_conns++; c->in_fd = -1; c->out_fd = -1; for (i = 0; i < 2; i++) { struct conn_dir *cd = &c->cd[i]; cd->index = i; cd->snd_next_bid = -1; cd->rcv_next_bid = -1; if (ext_stat) { cd->rcv_bucket = calloc(nr_bufs, sizeof(int)); cd->snd_bucket = calloc(nr_bufs, sizeof(int)); } init_msgs(cd); } printf("New client: id=%d, in=%d\n", c->tid, c->in_fd); gettimeofday(&c->start_time, NULL); pthread_barrier_init(&c->startup_barrier, NULL, 2); pthread_create(&c->thread, NULL, thread_main, c); /* * Wait for thread to have its ring setup, then either assign the fd * if it's non-fixed, or pass the fixed one */ pthread_barrier_wait(&c->startup_barrier); if (!fixed_files) { c->in_fd = cqe->res; } else { struct io_uring_sqe *sqe; uint64_t user_data; /* * Ring has just been setup, we'll use index 0 as the descriptor * value. */ user_data = __raw_encode(c->tid, __FD_PASS, 0, 0); sqe = io_uring_get_sqe(ring); io_uring_prep_msg_ring_fd(sqe, c->ring.ring_fd, cqe->res, 0, user_data, 0); encode_userdata(sqe, c, __NOP, 0, cqe->res); } return 0; } /* * Our socket request completed, issue a connect request to the other end. */ static int handle_sock(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct io_uring_sqe *sqe; int ret; vlog("%d: sock: res=%d\n", c->tid, cqe->res); c->out_fd = cqe->res; if (ipv6) { memset(&c->addr6, 0, sizeof(c->addr6)); c->addr6.sin6_family = AF_INET6; c->addr6.sin6_port = htons(send_port); ret = inet_pton(AF_INET6, host, &c->addr6.sin6_addr); } else { memset(&c->addr, 0, sizeof(c->addr)); c->addr.sin_family = AF_INET; c->addr.sin_port = htons(send_port); ret = inet_pton(AF_INET, host, &c->addr.sin_addr); } if (ret <= 0) { if (!ret) fprintf(stderr, "host not in right format\n"); else perror("inet_pton"); return 1; } sqe = get_sqe(ring); if (ipv6) { io_uring_prep_connect(sqe, c->out_fd, (struct sockaddr *) &c->addr6, sizeof(c->addr6)); } else { io_uring_prep_connect(sqe, c->out_fd, (struct sockaddr *) &c->addr, sizeof(c->addr)); } encode_userdata(sqe, c, __CONNECT, 0, c->out_fd); if (fixed_files) sqe->flags |= IOSQE_FIXED_FILE; return 0; } /* * Connection to the other end is done, submit a receive to start receiving * data. If we're a bidirectional proxy, issue a receive on both ends. If not, * then just a single recv will do. */ static int handle_connect(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); pthread_mutex_lock(&thread_lock); open_conns++; pthread_mutex_unlock(&thread_lock); if (bidi) submit_bidi_receive(ring, c); else submit_receive(ring, c); return 0; } /* * Append new segment to our currently active msg_vec. This will be submitted * as a sendmsg (with all of it), or as separate sends, later. If we're using * send_ring, then we won't hit this path. Instead, outgoing buffers are * added directly to our outgoing send buffer ring. */ static void send_append_vec(struct conn_dir *cd, void *data, int len) { struct msg_vec *mvec = snd_msg_vec(cd); if (mvec->iov_len == mvec->vec_size) { mvec->vec_size <<= 1; mvec->iov = realloc(mvec->iov, mvec->vec_size * sizeof(struct iovec)); } mvec->iov[mvec->iov_len].iov_base = data; mvec->iov[mvec->iov_len].iov_len = len; mvec->iov_len++; } /* * Queue a send based on the data received in this cqe, which came from * a completed receive operation. */ static void send_append(struct conn *c, struct conn_dir *cd, void *data, int bid, int len) { vlog("%d: send %d (%p, bid %d)\n", c->tid, len, data, bid); assert(bid < nr_bufs); /* if using provided buffers for send, add it upfront */ if (send_ring) { struct conn_buf_ring *cbr = &c->out_br; io_uring_buf_ring_add(cbr->br, data, len, bid, br_mask, 0); io_uring_buf_ring_advance(cbr->br, 1); } else { send_append_vec(cd, data, len); } } /* * For non recvmsg && multishot, a zero receive marks the end. For recvmsg * with multishot, we always get the header regardless. Hence a "zero receive" * is the size of the header. */ static int recv_done_res(int res) { if (!res) return 1; if (rcv_msg && recv_mshot && res == sizeof(struct io_uring_recvmsg_out)) return 1; return 0; } /* * Any receive that isn't recvmsg with multishot can be handled the same way. * Iterate from '*bid' and 'in_bytes' in total, and append the data to the * outgoing queue. */ static int recv_bids(struct conn *c, struct conn_dir *cd, int *bid, int in_bytes) { struct conn_buf_ring *cbr = &c->out_br; struct conn_buf_ring *in_cbr = &c->in_br; struct io_uring_buf *buf; int nr_packets = 0; while (in_bytes) { int this_bytes; void *data; buf = &in_cbr->br->bufs[*bid]; data = (void *) (unsigned long) buf->addr; this_bytes = buf->len; if (this_bytes > in_bytes) this_bytes = in_bytes; in_bytes -= this_bytes; if (send_ring) io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid, br_mask, nr_packets); else send_append(c, cd, data, *bid, this_bytes); *bid = (*bid + 1) & (nr_bufs - 1); nr_packets++; } if (send_ring) io_uring_buf_ring_advance(cbr->br, nr_packets); return nr_packets; } /* * Special handling of recvmsg with multishot */ static int recv_mshot_msg(struct conn *c, struct conn_dir *cd, int *bid, int in_bytes) { struct conn_buf_ring *cbr = &c->out_br; struct conn_buf_ring *in_cbr = &c->in_br; struct io_uring_buf *buf; int nr_packets = 0; while (in_bytes) { struct io_uring_recvmsg_out *pdu; int this_bytes; void *data; buf = &in_cbr->br->bufs[*bid]; /* * multishot recvmsg puts a header in front of the data - we * have to take that into account for the send setup, and * adjust the actual data read to not take this metadata into * account. For this use case, namelen and controllen will not * be set. If they were, they would need to be factored in too. */ buf->len -= sizeof(struct io_uring_recvmsg_out); in_bytes -= sizeof(struct io_uring_recvmsg_out); pdu = (void *) (unsigned long) buf->addr; vlog("pdu namelen %d, controllen %d, payload %d flags %x\n", pdu->namelen, pdu->controllen, pdu->payloadlen, pdu->flags); data = (void *) (pdu + 1); this_bytes = pdu->payloadlen; if (this_bytes > in_bytes) this_bytes = in_bytes; in_bytes -= this_bytes; if (send_ring) io_uring_buf_ring_add(cbr->br, data, this_bytes, *bid, br_mask, nr_packets); else send_append(c, cd, data, *bid, this_bytes); *bid = (*bid + 1) & (nr_bufs - 1); nr_packets++; } if (send_ring) io_uring_buf_ring_advance(cbr->br, nr_packets); return nr_packets; } static int __handle_recv(struct io_uring *ring, struct conn *c, struct conn_dir *cd, struct io_uring_cqe *cqe) { struct conn_dir *ocd = &c->cd[!cd->index]; int bid, nr_packets; /* * Not having a buffer attached should only happen if we get a zero * sized receive, because the other end closed the connection. It * cannot happen otherwise, as all our receives are using provided * buffers and hence it's not possible to return a CQE with a non-zero * result and not have a buffer attached. */ if (!(cqe->flags & IORING_CQE_F_BUFFER)) { cd->pending_recv = 0; if (!recv_done_res(cqe->res)) { fprintf(stderr, "no buffer assigned, res=%d\n", cqe->res); return 1; } start_close: prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe))); close_cd(c, cd); return 0; } if (cqe->res && cqe->res < buf_size) cd->rcv_shrt++; bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; /* * BIDI will use the same buffer pool and do receive on both CDs, * so can't reliably check. TODO. */ if (!bidi && cd->rcv_next_bid != -1 && bid != cd->rcv_next_bid) { fprintf(stderr, "recv bid %d, wanted %d\n", bid, cd->rcv_next_bid); goto start_close; } vlog("%d: recv: bid=%d, res=%d, cflags=%x\n", c->tid, bid, cqe->res, cqe->flags); /* * If we're a sink, we're done here. Just replenish the buffer back * to the pool. For proxy mode, we will send the data to the other * end and the buffer will be replenished once the send is done with * it. */ if (is_sink) nr_packets = replenish_buffers(c, &bid, cqe->res); else if (rcv_msg && recv_mshot) nr_packets = recv_mshot_msg(c, ocd, &bid, cqe->res); else nr_packets = recv_bids(c, ocd, &bid, cqe->res); if (cd->rcv_bucket) cd->rcv_bucket[nr_packets]++; if (!is_sink) { ocd->out_buffers += nr_packets; assert(ocd->out_buffers <= nr_bufs); } cd->rcv++; cd->rcv_next_bid = bid; /* * If IORING_CQE_F_MORE isn't set, then this is either a normal recv * that needs rearming, or it's a multishot that won't post any further * completions. Setup a new one for these cases. */ if (!(cqe->flags & IORING_CQE_F_MORE)) { cd->pending_recv = 0; if (recv_done_res(cqe->res)) goto start_close; if (is_sink) __submit_receive(ring, c, &c->cd[0], c->in_fd); } /* * Submit a send if we won't get anymore notifications from this * recv, or if we have nr_bufs / 2 queued up. If BIDI mode, send * every buffer. We assume this is interactive mode, and hence don't * delay anything. */ if (((!ocd->pending_send && (bidi || (ocd->out_buffers >= nr_bufs / 2))) || !(cqe->flags & IORING_CQE_F_MORE)) && !is_sink) prep_next_send(ring, c, ocd, other_dir_fd(c, cqe_to_fd(cqe))); if (!recv_done_res(cqe->res)) cd->in_bytes += cqe->res; return 0; } static int handle_recv(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct conn_dir *cd = cqe_to_conn_dir(c, cqe); return __handle_recv(ring, c, cd, cqe); } static int recv_error(struct error_handler *err, struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct conn_dir *cd = cqe_to_conn_dir(c, cqe); cd->pending_recv = 0; if (cqe->res != -ENOBUFS) return default_error(err, ring, cqe); recv_enobufs(ring, c, cd, other_dir_fd(c, cqe_to_fd(cqe))); return 0; } static void submit_send(struct io_uring *ring, struct conn *c, struct conn_dir *cd, int fd, void *data, int len, int bid, int flags) { struct io_uring_sqe *sqe; int bgid = c->out_br.bgid; if (cd->pending_send) return; cd->pending_send = 1; flags |= MSG_WAITALL | MSG_NOSIGNAL; sqe = get_sqe(ring); if (snd_msg) { struct io_msg *imsg = &cd->io_snd_msg; if (snd_zc) { io_uring_prep_sendmsg_zc(sqe, fd, &imsg->msg, flags); cd->snd_notif++; } else { io_uring_prep_sendmsg(sqe, fd, &imsg->msg, flags); } } else if (send_ring) { io_uring_prep_send(sqe, fd, NULL, 0, flags); } else if (!snd_zc) { io_uring_prep_send(sqe, fd, data, len, flags); } else { io_uring_prep_send_zc(sqe, fd, data, len, flags, 0); sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; sqe->buf_index = bid; cd->snd_notif++; } encode_userdata(sqe, c, __SEND, bid, fd); if (fixed_files) sqe->flags |= IOSQE_FIXED_FILE; if (send_ring) { sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = bgid; } if (snd_bundle) { sqe->ioprio |= IORING_RECVSEND_BUNDLE; cd->snd_mshot++; } else if (send_ring) cd->snd_mshot++; } /* * Prepare the next send request, if we need to. If one is already pending, * or if we're a sink and we don't need to do sends, then there's nothing * to do. * * Return 1 if another send completion is expected, 0 if not. */ static int prep_next_send(struct io_uring *ring, struct conn *c, struct conn_dir *cd, int fd) { int bid; if (cd->pending_send || is_sink) return 0; if (!cd->out_buffers) return 0; bid = cd->snd_next_bid; if (bid == -1) bid = 0; if (send_ring) { /* * send_ring mode is easy, there's nothing to do but submit * our next send request. That will empty the entire outgoing * queue. */ submit_send(ring, c, cd, fd, NULL, 0, bid, 0); return 1; } else if (snd_msg) { /* * For sendmsg mode, submit our currently prepared iovec, if * we have one, and swap our iovecs so that any further * receives will start preparing that one. */ struct io_msg *imsg = &cd->io_snd_msg; if (!msg_vec(imsg)->iov_len) return 0; imsg->msg.msg_iov = msg_vec(imsg)->iov; imsg->msg.msg_iovlen = msg_vec(imsg)->iov_len; msg_vec(imsg)->iov_len = 0; imsg->vec_index = !imsg->vec_index; submit_send(ring, c, cd, fd, NULL, 0, bid, 0); return 1; } else { /* * send without send_ring - submit the next available vec, * if any. If this vec is the last one in the current series, * then swap to the next vec. We flag each send with MSG_MORE, * unless this is the last part of the current vec. */ struct io_msg *imsg = &cd->io_snd_msg; struct msg_vec *mvec = msg_vec(imsg); int flags = !snd_zc ? MSG_MORE : 0; struct iovec *iov; if (mvec->iov_len == mvec->cur_iov) return 0; imsg->msg.msg_iov = msg_vec(imsg)->iov; iov = &mvec->iov[mvec->cur_iov]; mvec->cur_iov++; if (mvec->cur_iov == mvec->iov_len) { mvec->iov_len = 0; mvec->cur_iov = 0; imsg->vec_index = !imsg->vec_index; flags = 0; } submit_send(ring, c, cd, fd, iov->iov_base, iov->iov_len, bid, flags); return 1; } } /* * Handling a send with an outgoing send ring. Get the buffers from the * receive side, and add them to the ingoing buffer ring again. */ static int handle_send_ring(struct conn *c, struct conn_dir *cd, int bid, int bytes) { struct conn_buf_ring *in_cbr = &c->in_br; struct conn_buf_ring *out_cbr = &c->out_br; int i = 0; while (bytes) { struct io_uring_buf *buf = &out_cbr->br->bufs[bid]; int this_bytes; void *this_buf; this_bytes = buf->len; if (this_bytes > bytes) this_bytes = bytes; cd->out_bytes += this_bytes; vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes); this_buf = in_cbr->buf + bid * buf_size; io_uring_buf_ring_add(in_cbr->br, this_buf, buf_size, bid, br_mask, i); /* * Find the provided buffer that the receive consumed, and * which we then used for the send, and add it back to the * pool so it can get picked by another receive. Once the send * is done, we're done with it. */ bid = (bid + 1) & (nr_bufs - 1); bytes -= this_bytes; i++; } cd->snd_next_bid = bid; io_uring_buf_ring_advance(in_cbr->br, i); if (pending_shutdown(c)) close_cd(c, cd); return i; } /* * sendmsg, or send without a ring. Just add buffers back to the ingoing * ring for receives. */ static int handle_send_buf(struct conn *c, struct conn_dir *cd, int bid, int bytes) { struct conn_buf_ring *in_cbr = &c->in_br; int i = 0; while (bytes) { struct io_uring_buf *buf = &in_cbr->br->bufs[bid]; int this_bytes; this_bytes = bytes; if (this_bytes > buf->len) this_bytes = buf->len; vlog("%d: send: bid=%d, len=%d\n", c->tid, bid, this_bytes); cd->out_bytes += this_bytes; /* each recvmsg mshot package has this overhead */ if (rcv_msg && recv_mshot) cd->out_bytes += sizeof(struct io_uring_recvmsg_out); replenish_buffer(in_cbr, bid, i); bid = (bid + 1) & (nr_bufs - 1); bytes -= this_bytes; i++; } io_uring_buf_ring_advance(in_cbr->br, i); cd->snd_next_bid = bid; return i; } static int __handle_send(struct io_uring *ring, struct conn *c, struct conn_dir *cd, struct io_uring_cqe *cqe) { struct conn_dir *ocd; int bid, nr_packets; if (send_ring) { if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "no buffer in send?! %d\n", cqe->res); return 1; } bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; } else { bid = cqe_to_bid(cqe); } /* * CQE notifications only happen with send/sendmsg zerocopy. They * tell us that the data has been acked, and that hence the buffer * is now free to reuse. Waiting on an ACK for each packet will slow * us down tremendously, so do all of our sends and then wait for * the ACKs to come in. They tend to come in bundles anyway. Once * all acks are done (cd->snd_notif == 0), then fire off the next * receive. */ if (cqe->flags & IORING_CQE_F_NOTIF) { cd->snd_notif--; } else { if (cqe->res && cqe->res < buf_size) cd->snd_shrt++; /* * BIDI will use the same buffer pool and do sends on both CDs, * so can't reliably check. TODO. */ if (!bidi && send_ring && cd->snd_next_bid != -1 && bid != cd->snd_next_bid) { fprintf(stderr, "send bid %d, wanted %d at %lu\n", bid, cd->snd_next_bid, cd->out_bytes); goto out_close; } assert(bid <= nr_bufs); vlog("send: got %d, %lu\n", cqe->res, cd->out_bytes); if (send_ring) nr_packets = handle_send_ring(c, cd, bid, cqe->res); else nr_packets = handle_send_buf(c, cd, bid, cqe->res); if (cd->snd_bucket) cd->snd_bucket[nr_packets]++; cd->out_buffers -= nr_packets; assert(cd->out_buffers >= 0); cd->snd++; } if (!(cqe->flags & IORING_CQE_F_MORE)) { int do_recv_arm; cd->pending_send = 0; /* * send done - see if the current vec has data to submit, and * do so if it does. if it doesn't have data yet, nothing to * do. */ do_recv_arm = !prep_next_send(ring, c, cd, cqe_to_fd(cqe)); ocd = &c->cd[!cd->index]; if (!cd->snd_notif && do_recv_arm && !ocd->pending_recv) { int fd = other_dir_fd(c, cqe_to_fd(cqe)); __submit_receive(ring, c, ocd, fd); } out_close: if (pending_shutdown(c)) close_cd(c, cd); } vlog("%d: pending sends %d\n", c->tid, cd->pending_send); return 0; } static int handle_send(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct conn_dir *cd = cqe_to_conn_dir(c, cqe); return __handle_send(ring, c, cd, cqe); } static int send_error(struct error_handler *err, struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct conn_dir *cd = cqe_to_conn_dir(c, cqe); cd->pending_send = 0; /* res can have high bit set */ if (cqe->flags & IORING_CQE_F_NOTIF) return handle_send(ring, cqe); if (cqe->res != -ENOBUFS) return default_error(err, ring, cqe); cd->snd_enobufs++; return 0; } /* * We don't expect to get here, as we marked it with skipping posting a * CQE if it was successful. If it does trigger, than means it fails and * that our close has not been done. Log the shutdown error and issue a new * separate close. */ static int handle_shutdown(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); struct io_uring_sqe *sqe; int fd = cqe_to_fd(cqe); fprintf(stderr, "Got shutdown notication on fd %d\n", fd); if (!cqe->res) fprintf(stderr, "Unexpected success shutdown CQE\n"); else if (cqe->res < 0) fprintf(stderr, "Shutdown got %s\n", strerror(-cqe->res)); sqe = get_sqe(ring); if (fixed_files) io_uring_prep_close_direct(sqe, fd); else io_uring_prep_close(sqe, fd); encode_userdata(sqe, c, __CLOSE, 0, fd); return 0; } /* * Final stage of a connection, the shutdown and close has finished. Mark * it as disconnected and let the main loop reap it. */ static int handle_close(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); int fd = cqe_to_fd(cqe); printf("Closed client: id=%d, in_fd=%d, out_fd=%d\n", c->tid, c->in_fd, c->out_fd); if (fd == c->in_fd) c->in_fd = -1; else if (fd == c->out_fd) c->out_fd = -1; if (c->in_fd == -1 && c->out_fd == -1) { c->flags |= CONN_F_DISCONNECTED; pthread_mutex_lock(&thread_lock); __show_stats(c); open_conns--; pthread_mutex_unlock(&thread_lock); free_buffer_rings(ring, c); free_msgs(&c->cd[0]); free_msgs(&c->cd[1]); free(c->cd[0].rcv_bucket); free(c->cd[0].snd_bucket); } return 0; } static int handle_cancel(struct io_uring *ring, struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); int fd = cqe_to_fd(cqe); c->pending_cancels--; vlog("%d: got cancel fd %d, refs %d\n", c->tid, fd, c->pending_cancels); if (!c->pending_cancels) { queue_shutdown_close(ring, c, c->in_fd); if (c->out_fd != -1) queue_shutdown_close(ring, c, c->out_fd); io_uring_submit(ring); } return 0; } static void open_socket(struct conn *c) { if (is_sink) { pthread_mutex_lock(&thread_lock); open_conns++; pthread_mutex_unlock(&thread_lock); submit_receive(&c->ring, c); } else { struct io_uring_sqe *sqe; int domain; if (ipv6) domain = AF_INET6; else domain = AF_INET; /* * If fixed_files is set, proxy will use fixed files for any new * file descriptors it instantiates. Fixd files, or fixed * descriptors, are io_uring private file descriptors. They * cannot be accessed outside of io_uring. io_uring holds a * fixed reference to them, which means that we do not need to * grab per-request references to them. Particularly for * threaded applications, grabbing and dropping file references * for each operation can be costly as the file table is shared. * This generally shows up as fget/fput related overhead in any * workload profiles. * * Fixed descriptors are passed in via the 'fd' field just like * regular descriptors, and then marked as such by setting the * IOSQE_FIXED_FILE flag in the sqe->flags field. Some helpers * do that automatically, like the below, others will need it * set manually if they don't have a *direct*() helper. * * For operations that instantiate them, like the opening of a * direct socket, the application may either ask the kernel to * find a free one (as is done below), or the application may * manage the space itself and pass in an index for a currently * free slot in the table. If the kernel is asked to allocate a * free direct descriptor, note that io_uring does not abide by * the POSIX mandated "lowest free must be returned". It may * return any free descriptor of its choosing. */ sqe = get_sqe(&c->ring); if (fixed_files) io_uring_prep_socket_direct_alloc(sqe, domain, SOCK_STREAM, 0, 0); else io_uring_prep_socket(sqe, domain, SOCK_STREAM, 0, 0); encode_userdata(sqe, c, __SOCK, 0, 0); } } /* * Start of connection, we got our in descriptor. */ static int handle_fd_pass(struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); int fd = cqe_to_fd(cqe); vlog("%d: got fd pass %d\n", c->tid, fd); c->in_fd = fd; open_socket(c); return 0; } static int handle_stop(struct io_uring_cqe *cqe) { struct conn *c = cqe_to_conn(cqe); printf("Client %d: queueing shutdown\n", c->tid); queue_cancel(&c->ring, c); return 0; } /* * Called for each CQE that we receive. Decode the request type that it * came from, and call the appropriate handler. */ static int handle_cqe(struct io_uring *ring, struct io_uring_cqe *cqe) { int ret; /* * Unlikely, but there's an error in this CQE. If an error handler * is defined, call it, and that will deal with it. If no error * handler is defined, the opcode handler either doesn't care or will * handle it on its own. */ if (cqe->res < 0) { struct error_handler *err = &error_handlers[cqe_to_op(cqe)]; if (err->error_fn) return err->error_fn(err, ring, cqe); } switch (cqe_to_op(cqe)) { case __ACCEPT: ret = handle_accept(ring, cqe); break; case __SOCK: ret = handle_sock(ring, cqe); break; case __CONNECT: ret = handle_connect(ring, cqe); break; case __RECV: case __RECVMSG: ret = handle_recv(ring, cqe); break; case __SEND: case __SENDMSG: ret = handle_send(ring, cqe); break; case __CANCEL: ret = handle_cancel(ring, cqe); break; case __SHUTDOWN: ret = handle_shutdown(ring, cqe); break; case __CLOSE: ret = handle_close(ring, cqe); break; case __FD_PASS: ret = handle_fd_pass(cqe); break; case __STOP: ret = handle_stop(cqe); break; case __NOP: ret = 0; break; default: fprintf(stderr, "bad user data %lx\n", (long) cqe->user_data); return 1; } return ret; } static void house_keeping(struct io_uring *ring) { static unsigned long last_bytes; unsigned long bytes, elapsed; struct conn *c; int i, j; vlog("House keeping entered\n"); bytes = 0; for (i = 0; i < nr_conns; i++) { c = &conns[i]; for (j = 0; j < 2; j++) { struct conn_dir *cd = &c->cd[j]; bytes += cd->in_bytes + cd->out_bytes; } if (c->flags & CONN_F_DISCONNECTED) { vlog("%d: disconnected\n", i); if (!(c->flags & CONN_F_REAPED)) { void *ret; pthread_join(c->thread, &ret); c->flags |= CONN_F_REAPED; } continue; } if (c->flags & CONN_F_DISCONNECTING) continue; if (should_shutdown(c)) { __close_conn(ring, c); c->flags |= CONN_F_DISCONNECTING; } } elapsed = mtime_since_now(&last_housekeeping); if (bytes && elapsed >= 900) { unsigned long bw; bw = (8 * (bytes - last_bytes) / 1000UL) / elapsed; if (bw) { if (open_conns) printf("Bandwidth (threads=%d): %'luMbit\n", open_conns, bw); gettimeofday(&last_housekeeping, NULL); last_bytes = bytes; } } } /* * Event loop shared between the parent, and the connections. Could be * split in two, as they don't handle the same types of events. For the per * connection loop, 'c' is valid. For the main loop, it's NULL. */ static int __event_loop(struct io_uring *ring, struct conn *c) { struct __kernel_timespec active_ts, idle_ts; int flags; idle_ts.tv_sec = 0; idle_ts.tv_nsec = 100000000LL; active_ts = idle_ts; if (wait_usec > 1000000) { active_ts.tv_sec = wait_usec / 1000000; wait_usec -= active_ts.tv_sec * 1000000; } active_ts.tv_nsec = wait_usec * 1000; gettimeofday(&last_housekeeping, NULL); flags = 0; while (1) { struct __kernel_timespec *ts = &idle_ts; struct io_uring_cqe *cqe; unsigned int head; int ret, i, to_wait; /* * If wait_batch is set higher than 1, then we'll wait on * that amount of CQEs to be posted each loop. If used with * DEFER_TASKRUN, this can provide a substantial reduction * in context switch rate as the task isn't woken until the * requested number of events can be returned. * * Can be used with -t to set a wait_usec timeout as well. * For example, if an application can deal with 250 usec * of wait latencies, it can set -w8 -t250 which will cause * io_uring to return when either 8 events have been received, * or if 250 usec of waiting has passed. * * If we don't have any open connections, wait on just 1 * always. */ to_wait = 1; if (open_conns && !flags) { ts = &active_ts; to_wait = wait_batch; } vlog("Submit and wait for %d\n", to_wait); ret = io_uring_submit_and_wait_timeout(ring, &cqe, to_wait, ts, NULL); if (*ring->cq.koverflow) printf("overflow %u\n", *ring->cq.koverflow); if (*ring->sq.kflags & IORING_SQ_CQ_OVERFLOW) printf("saw overflow\n"); vlog("Submit and wait: %d\n", ret); i = flags = 0; io_uring_for_each_cqe(ring, head, cqe) { if (handle_cqe(ring, cqe)) return 1; flags |= cqe_to_conn(cqe)->flags; ++i; } vlog("Handled %d events\n", i); /* * Advance the CQ ring for seen events when we've processed * all of them in this loop. This can also be done with * io_uring_cqe_seen() in each handler above, which just marks * that single CQE as seen. However, it's more efficient to * mark a batch as seen when we're done with that batch. */ if (i) { io_uring_cq_advance(ring, i); events += i; } event_loops++; if (c) { if (c->flags & CONN_F_DISCONNECTED) break; } else { house_keeping(ring); } } return 0; } /* * Main event loop, Submit our multishot accept request, and then just loop * around handling incoming connections. */ static int parent_loop(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; /* * proxy provides a way to use either multishot receive or not, but * for accept, we always use multishot. A multishot accept request * needs only be armed once, and then it'll trigger a completion and * post a CQE whenever a new connection is accepted. No need to do * anything else, unless the multishot accept terminates. This happens * if it encounters an error. Applications should check for * IORING_CQE_F_MORE in cqe->flags - this tells you if more completions * are expected from this request or not. Non-multishot never have * this set, where multishot will always have this set unless an error * occurs. */ sqe = get_sqe(ring); if (fixed_files) io_uring_prep_multishot_accept_direct(sqe, fd, NULL, NULL, 0); else io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0); __encode_userdata(sqe, 0, __ACCEPT, 0, fd); return __event_loop(ring, NULL); } static int init_ring(struct io_uring *ring, int nr_files) { struct io_uring_params params; int ret; /* * By default, set us up with a big CQ ring. Not strictly needed * here, but it's very important to never overflow the CQ ring. * Events will not be dropped if this happens, but it does slow * the application down in dealing with overflown events. * * Set SINGLE_ISSUER, which tells the kernel that only one thread * is doing IO submissions. This enables certain optimizations in * the kernel. */ memset(¶ms, 0, sizeof(params)); params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_CLAMP; params.flags |= IORING_SETUP_CQSIZE; params.cq_entries = 1024; /* * If use_huge is set, setup the ring with IORING_SETUP_NO_MMAP. This * means that the application allocates the memory for the ring, and * the kernel maps it. The alternative is having the kernel allocate * the memory, and then liburing will mmap it. But we can't really * support huge pages that way. If this fails, then ensure that the * system has huge pages set aside upfront. */ if (use_huge) params.flags |= IORING_SETUP_NO_MMAP; /* * DEFER_TASKRUN decouples async event reaping and retrying from * regular system calls. If this isn't set, then io_uring uses * normal task_work for this. task_work is always being run on any * exit to userspace. Real applications do more than just call IO * related system calls, and hence we can be running this work way * too often. Using DEFER_TASKRUN defers any task_work running to * when the application enters the kernel anyway to wait on new * events. It's generally the preferred and recommended way to setup * a ring. */ if (defer_tw) { params.flags |= IORING_SETUP_DEFER_TASKRUN; sqpoll = 0; } /* * SQPOLL offloads any request submission and retry operations to a * dedicated thread. This enables an application to do IO without * ever having to enter the kernel itself. The SQPOLL thread will * stay busy as long as there's work to do, and go to sleep if * sq_thread_idle msecs have passed. If it's running, submitting new * IO just needs to make them visible to the SQPOLL thread, it needs * not enter the kernel. For submission, the application will only * enter the kernel if the SQPOLL has been idle long enough that it * has gone to sleep. * * Waiting on events still need to enter the kernel, if none are * available. The application may also use io_uring_peek_cqe() to * check for new events without entering the kernel, as completions * will be continually produced to the CQ ring by the SQPOLL thread * as they occur. */ if (sqpoll) { params.flags |= IORING_SETUP_SQPOLL; params.sq_thread_idle = 1000; defer_tw = 0; } /* * If neither DEFER_TASKRUN or SQPOLL is used, set COOP_TASKRUN. This * avoids heavy signal based notifications, which can force an * application to enter the kernel and process it as soon as they * occur. */ if (!sqpoll && !defer_tw) params.flags |= IORING_SETUP_COOP_TASKRUN; /* * The SQ ring size need not be larger than any batch of requests * that need to be prepared before submit. Normally in a loop we'd * only need a few, if any, particularly if multishot is used. */ ret = io_uring_queue_init_params(ring_size, ring, ¶ms); if (ret) { fprintf(stderr, "%s\n", strerror(-ret)); return 1; } /* * If send serialization is available and no option was given to use * it or not, default it to on. If it was turned on and the kernel * doesn't support it, turn it off. */ if (params.features & IORING_FEAT_SEND_BUF_SELECT) { if (send_ring == -1) send_ring = 1; } else { if (send_ring == 1) { fprintf(stderr, "Kernel doesn't support ring provided " "buffers for sends, disabled\n"); } send_ring = 0; } if (!send_ring && snd_bundle) { fprintf(stderr, "Can't use send bundle without send_ring\n"); snd_bundle = 0; } if (fixed_files) { /* * If fixed files are used, we need to allocate a fixed file * table upfront where new direct descriptors can be managed. */ ret = io_uring_register_files_sparse(ring, nr_files); if (ret) { fprintf(stderr, "file register: %d\n", ret); return 1; } /* * If fixed files are used, we also register the ring fd. See * comment near io_uring_prep_socket_direct_alloc() further * down. This avoids the fget/fput overhead associated with * the io_uring_enter(2) system call itself, which is used to * submit and wait on events. */ ret = io_uring_register_ring_fd(ring); if (ret != 1) { fprintf(stderr, "ring register: %d\n", ret); return 1; } } if (napi) { struct io_uring_napi n = { .prefer_busy_poll = napi > 1 ? 1 : 0, .busy_poll_to = napi_timeout, }; ret = io_uring_register_napi(ring, &n); if (ret) { fprintf(stderr, "io_uring_register_napi: %d\n", ret); if (ret != -EINVAL) return 1; fprintf(stderr, "NAPI not available, turned off\n"); } } return 0; } static void *thread_main(void *data) { struct conn *c = data; int ret; c->flags |= CONN_F_STARTED; /* we need a max of 4 descriptors for each client */ ret = init_ring(&c->ring, 4); if (ret) goto done; if (setup_buffer_rings(&c->ring, c)) goto done; /* * If we're using fixed files, then we need to wait for the parent * to install the c->in_fd into our direct descriptor table. When * that happens, we'll set things up. If we're not using fixed files, * we can set up the receive or connect now. */ if (!fixed_files) open_socket(c); /* we're ready */ pthread_barrier_wait(&c->startup_barrier); __event_loop(&c->ring, c); done: return NULL; } static void usage(const char *name) { printf("%s:\n", name); printf("\t-m:\t\tUse multishot receive (%d)\n", recv_mshot); printf("\t-d:\t\tUse DEFER_TASKRUN (%d)\n", defer_tw); printf("\t-S:\t\tUse SQPOLL (%d)\n", sqpoll); printf("\t-f:\t\tUse only fixed files (%d)\n", fixed_files); printf("\t-a:\t\tUse huge pages for the ring (%d)\n", use_huge); printf("\t-t:\t\tTimeout for waiting on CQEs (usec) (%d)\n", wait_usec); printf("\t-w:\t\tNumber of CQEs to wait for each loop (%d)\n", wait_batch); printf("\t-B:\t\tUse bi-directional mode (%d)\n", bidi); printf("\t-s:\t\tAct only as a sink (%d)\n", is_sink); printf("\t-q:\t\tRing size to use (%d)\n", ring_size); printf("\t-H:\t\tHost to connect to (%s)\n", host); printf("\t-r:\t\tPort to receive on (%d)\n", receive_port); printf("\t-p:\t\tPort to connect to (%d)\n", send_port); printf("\t-6:\t\tUse IPv6 (%d)\n", ipv6); printf("\t-N:\t\tUse NAPI polling (%d)\n", napi); printf("\t-T:\t\tNAPI timeout (usec) (%d)\n", napi_timeout); printf("\t-b:\t\tSend/receive buf size (%d)\n", buf_size); printf("\t-n:\t\tNumber of provided buffers (pow2) (%d)\n", nr_bufs); printf("\t-u:\t\tUse provided buffers for send (%d)\n", send_ring); printf("\t-C:\t\tUse bundles for send (%d)\n", snd_bundle); printf("\t-z:\t\tUse zerocopy send (%d)\n", snd_zc); printf("\t-c:\t\tUse bundles for recv (%d)\n", snd_bundle); printf("\t-M:\t\tUse sendmsg (%d)\n", snd_msg); printf("\t-M:\t\tUse recvmsg (%d)\n", rcv_msg); printf("\t-x:\t\tShow extended stats (%d)\n", ext_stat); printf("\t-V:\t\tIncrease verbosity (%d)\n", verbose); } /* * Options parsing the ring / net setup */ int main(int argc, char *argv[]) { struct io_uring ring; struct sigaction sa = { }; const char *optstring; int opt, ret, fd; setlocale(LC_NUMERIC, "en_US"); page_size = sysconf(_SC_PAGESIZE); if (page_size < 0) { perror("sysconf(_SC_PAGESIZE)"); return 1; } pthread_mutex_init(&thread_lock, NULL); optstring = "m:d:S:s:b:f:H:r:p:n:B:N:T:w:t:M:R:u:c:C:q:a:x:z:6Vh?"; while ((opt = getopt(argc, argv, optstring)) != -1) { switch (opt) { case 'm': recv_mshot = !!atoi(optarg); break; case 'S': sqpoll = !!atoi(optarg); break; case 'd': defer_tw = !!atoi(optarg); break; case 'b': buf_size = atoi(optarg); break; case 'n': nr_bufs = atoi(optarg); break; case 'u': send_ring = !!atoi(optarg); break; case 'c': rcv_bundle = !!atoi(optarg); break; case 'C': snd_bundle = !!atoi(optarg); break; case 'w': wait_batch = atoi(optarg); break; case 't': wait_usec = atoi(optarg); break; case 's': is_sink = !!atoi(optarg); break; case 'f': fixed_files = !!atoi(optarg); break; case 'H': host = strdup(optarg); break; case 'r': receive_port = atoi(optarg); break; case 'p': send_port = atoi(optarg); break; case 'B': bidi = !!atoi(optarg); break; case 'N': napi = !!atoi(optarg); break; case 'T': napi_timeout = atoi(optarg); break; case '6': ipv6 = true; break; case 'M': snd_msg = !!atoi(optarg); break; case 'z': snd_zc = !!atoi(optarg); break; case 'R': rcv_msg = !!atoi(optarg); break; case 'q': ring_size = atoi(optarg); break; case 'a': use_huge = !!atoi(optarg); break; case 'x': ext_stat = !!atoi(optarg); break; case 'V': verbose++; break; case 'h': default: usage(argv[0]); return 1; } } if (bidi && is_sink) { fprintf(stderr, "Can't be both bidi proxy and sink\n"); return 1; } if (snd_msg && sqpoll) { fprintf(stderr, "SQPOLL with msg variants disabled\n"); snd_msg = 0; } if (rcv_msg && rcv_bundle) { fprintf(stderr, "Can't use bundles with recvmsg\n"); rcv_msg = 0; } if (snd_msg && snd_bundle) { fprintf(stderr, "Can't use bundles with sendmsg\n"); snd_msg = 0; } if (snd_msg && send_ring) { fprintf(stderr, "Can't use send ring sendmsg\n"); snd_msg = 0; } if (snd_zc && (send_ring || snd_bundle)) { fprintf(stderr, "Can't use send zc with bundles or ring\n"); send_ring = snd_bundle = 0; } /* * For recvmsg w/multishot, we waste some data at the head of the * packet every time. Adjust the buffer size to account for that, * so we're still handing 'buf_size' actual payload of data. */ if (rcv_msg && recv_mshot) { fprintf(stderr, "Adjusted buf size for recvmsg w/multishot\n"); buf_size += sizeof(struct io_uring_recvmsg_out); } br_mask = nr_bufs - 1; fd = setup_listening_socket(receive_port, ipv6); if (is_sink) send_port = -1; if (fd == -1) return 1; atexit(show_stats); sa.sa_handler = sig_int; sa.sa_flags = SA_RESTART; sigaction(SIGINT, &sa, NULL); ret = init_ring(&ring, MAX_CONNS * 3); if (ret) return ret; printf("Backend: sqpoll=%d, defer_tw=%d, fixed_files=%d, " "is_sink=%d, buf_size=%d, nr_bufs=%d, host=%s, send_port=%d, " "receive_port=%d, napi=%d, napi_timeout=%d, huge_page=%d\n", sqpoll, defer_tw, fixed_files, is_sink, buf_size, nr_bufs, host, send_port, receive_port, napi, napi_timeout, use_huge); printf(" recv options: recvmsg=%d, recv_mshot=%d, recv_bundle=%d\n", rcv_msg, recv_mshot, rcv_bundle); printf(" send options: sendmsg=%d, send_ring=%d, send_bundle=%d, " "send_zerocopy=%d\n", snd_msg, send_ring, snd_bundle, snd_zc); return parent_loop(&ring, fd); } liburing-2.6/examples/proxy.h000066400000000000000000000041051461424365000163510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_PROXY_H #define LIBURING_PROXY_H #include /* * Generic opcode agnostic encoding to sqe/cqe->user_data */ struct userdata { union { struct { uint16_t op_tid; /* 4 bits op, 12 bits tid */ uint16_t bid; uint16_t fd; }; uint64_t val; }; }; #define OP_SHIFT (12) #define TID_MASK ((1U << 12) - 1) /* * Packs the information that we will need at completion time into the * sqe->user_data field, which is passed back in the completion in * cqe->user_data. Some apps would need more space than this, and in fact * I'd love to pack the requested IO size in here, and it's not uncommon to * see apps use this field as just a cookie to either index a data structure * at completion time, or even just put the pointer to the associated * structure into this field. */ static inline void __encode_userdata(struct io_uring_sqe *sqe, int tid, int op, int bid, int fd) { struct userdata ud = { .op_tid = (op << OP_SHIFT) | tid, .bid = bid, .fd = fd }; io_uring_sqe_set_data64(sqe, ud.val); } static inline uint64_t __raw_encode(int tid, int op, int bid, int fd) { struct userdata ud = { .op_tid = (op << OP_SHIFT) | tid, .bid = bid, .fd = fd }; return ud.val; } static inline int cqe_to_op(struct io_uring_cqe *cqe) { struct userdata ud = { .val = cqe->user_data }; return ud.op_tid >> OP_SHIFT; } static inline int cqe_to_bid(struct io_uring_cqe *cqe) { struct userdata ud = { .val = cqe->user_data }; return ud.bid; } static inline int cqe_to_fd(struct io_uring_cqe *cqe) { struct userdata ud = { .val = cqe->user_data }; return ud.fd; } static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } #endif liburing-2.6/examples/rsrc-update-bench.c000066400000000000000000000041401461424365000204700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include "liburing.h" static unsigned long runtime_ms = 10000; static unsigned long gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } int main(void) { unsigned long tstop; unsigned long nr_reqs = 0; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int pipe1[2]; int ret, i, qd = 32; int table_size = 128; if (pipe(pipe1) != 0) { perror("pipe"); return 1; } ret = io_uring_queue_init(1024, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret) { fprintf(stderr, "io_uring_queue_init failed: %d\n", ret); return 1; } ret = io_uring_register_ring_fd(&ring); if (ret < 0) { fprintf(stderr, "io_uring_register_ring_fd failed\n"); return 1; } ret = io_uring_register_files_sparse(&ring, table_size); if (ret < 0) { fprintf(stderr, "io_uring_register_files_sparse failed\n"); return 1; } for (i = 0; i < table_size; i++) { ret = io_uring_register_files_update(&ring, i, pipe1, 1); if (ret < 0) { fprintf(stderr, "io_uring_register_files_update failed\n"); return 1; } } srand(time(NULL)); tstop = gettimeofday_ms() + runtime_ms; do { int off = rand(); for (i = 0; i < qd; i++) { sqe = io_uring_get_sqe(&ring); int roff = (off + i) % table_size; io_uring_prep_files_update(sqe, pipe1, 1, roff); } ret = io_uring_submit(&ring); if (ret != qd) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); return 1; } for (i = 0; i < qd; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "child: wait completion %d\n", ret); break; } io_uring_cqe_seen(&ring, cqe); nr_reqs++; } } while (gettimeofday_ms() < tstop); fprintf(stderr, "max updates/s: %lu\n", nr_reqs * 1000UL / runtime_ms); io_uring_queue_exit(&ring); close(pipe1[0]); close(pipe1[1]); return 0; } liburing-2.6/examples/send-zerocopy.c000066400000000000000000000354041461424365000177720ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #define ZC_TAG 0xfffffffULL #define MAX_SUBMIT_NR 512 #define MAX_THREADS 100 struct thread_data { pthread_t thread; void *ret; int idx; unsigned long long packets; unsigned long long bytes; unsigned long long dt_ms; struct sockaddr_storage dst_addr; int fd; }; static bool cfg_reg_ringfd = true; static bool cfg_fixed_files = 1; static bool cfg_zc = 1; static int cfg_nr_reqs = 8; static bool cfg_fixed_buf = 1; static bool cfg_hugetlb = 0; static bool cfg_defer_taskrun = 0; static int cfg_cpu = -1; static bool cfg_rx = 0; static unsigned cfg_nr_threads = 1; static int cfg_family = PF_UNSPEC; static int cfg_type = 0; static int cfg_payload_len; static int cfg_port = 8000; static int cfg_runtime_ms = 4200; static bool cfg_rx_poll = false; static socklen_t cfg_alen; static char *str_addr = NULL; static char payload_buf[IP_MAXPACKET] __attribute__((aligned(4096))); static char *payload; static struct thread_data threads[MAX_THREADS]; static pthread_barrier_t barrier; static bool should_stop = false; static void sigint_handler(__attribute__((__unused__)) int sig) { /* kill if should_stop can't unblock threads fast enough */ if (should_stop) _exit(-1); should_stop = true; } /* * Implementation of error(3), prints an error message and exits. */ static void t_error(int status, int errnum, const char *format, ...) { va_list args; va_start(args, format); vfprintf(stderr, format, args); if (errnum) fprintf(stderr, ": %s", strerror(errnum)); fprintf(stderr, "\n"); va_end(args); exit(status); } static void set_cpu_affinity(void) { cpu_set_t mask; if (cfg_cpu == -1) return; CPU_ZERO(&mask); CPU_SET(cfg_cpu, &mask); if (sched_setaffinity(0, sizeof(mask), &mask)) t_error(1, errno, "unable to pin cpu\n"); } static void set_iowq_affinity(struct io_uring *ring) { cpu_set_t mask; int ret; if (cfg_cpu == -1) return; CPU_ZERO(&mask); CPU_SET(cfg_cpu, &mask); ret = io_uring_register_iowq_aff(ring, 1, &mask); if (ret) t_error(1, ret, "unabled to set io-wq affinity\n"); } static unsigned long gettimeofday_ms(void) { struct timeval tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } static void do_setsockopt(int fd, int level, int optname, int val) { if (setsockopt(fd, level, optname, &val, sizeof(val))) t_error(1, errno, "setsockopt %d.%d: %d", level, optname, val); } static void setup_sockaddr(int domain, const char *str_addr, struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; int port = cfg_port; switch (domain) { case PF_INET: memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(port); if (str_addr && inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) t_error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(port); if (str_addr && inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) t_error(1, 0, "ipv6 parse error: %s", str_addr); break; default: t_error(1, 0, "illegal domain"); } } static int do_poll(int fd, int events) { struct pollfd pfd; int ret; pfd.events = events; pfd.revents = 0; pfd.fd = fd; ret = poll(&pfd, 1, -1); if (ret == -1) t_error(1, errno, "poll"); return ret && (pfd.revents & events); } /* Flush all outstanding bytes for the tcp receive queue */ static int do_flush_tcp(struct thread_data *td, int fd) { int ret; /* MSG_TRUNC flushes up to len bytes */ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); if (ret == -1 && errno == EAGAIN) return 0; if (ret == -1) t_error(1, errno, "flush"); if (!ret) return 1; td->packets++; td->bytes += ret; return 0; } /* Flush all outstanding datagrams. Verify first few bytes of each. */ static int do_flush_datagram(struct thread_data *td, int fd) { long ret, off = 0; char buf[64]; /* MSG_TRUNC will return full datagram length */ ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC); if (ret == -1 && errno == EAGAIN) return 0; if (ret == -1) t_error(1, errno, "recv"); if (ret != cfg_payload_len) t_error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len); if ((unsigned long) ret > sizeof(buf) - off) ret = sizeof(buf) - off; if (memcmp(buf + off, payload, ret)) t_error(1, 0, "recv: data mismatch"); td->packets++; td->bytes += cfg_payload_len; return 0; } static void do_setup_rx(int domain, int type, int protocol) { struct sockaddr_storage addr = {}; struct thread_data *td; int listen_fd, fd; unsigned int i; fd = socket(domain, type, protocol); if (fd == -1) t_error(1, errno, "socket r"); do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1); setup_sockaddr(cfg_family, str_addr, &addr); if (bind(fd, (void *)&addr, cfg_alen)) t_error(1, errno, "bind"); if (type != SOCK_STREAM) { if (cfg_nr_threads != 1) t_error(1, 0, "udp rx cant multithread"); threads[0].fd = fd; return; } listen_fd = fd; if (listen(listen_fd, cfg_nr_threads)) t_error(1, errno, "listen"); for (i = 0; i < cfg_nr_threads; i++) { td = &threads[i]; fd = accept(listen_fd, NULL, NULL); if (fd == -1) t_error(1, errno, "accept"); td->fd = fd; } if (close(listen_fd)) t_error(1, errno, "close listen sock"); } static void *do_rx(void *arg) { struct thread_data *td = arg; const int cfg_receiver_wait_ms = 400; uint64_t tstop; int ret, fd = td->fd; tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; do { if (cfg_type == SOCK_STREAM) ret = do_flush_tcp(td, fd); else ret = do_flush_datagram(td, fd); if (ret) break; do_poll(fd, POLLIN); } while (gettimeofday_ms() < tstop); if (close(fd)) t_error(1, errno, "close"); pthread_exit(&td->ret); return NULL; } static inline struct io_uring_cqe *wait_cqe_fast(struct io_uring *ring) { struct io_uring_cqe *cqe; unsigned head; int ret; io_uring_for_each_cqe(ring, head, cqe) return cqe; ret = io_uring_wait_cqe(ring, &cqe); if (ret) t_error(1, ret, "wait cqe"); return cqe; } static void do_tx(struct thread_data *td, int domain, int type, int protocol) { const int notif_slack = 128; struct io_uring ring; struct iovec iov; uint64_t tstart; int i, fd, ret; int compl_cqes = 0; int ring_flags = IORING_SETUP_COOP_TASKRUN | IORING_SETUP_SINGLE_ISSUER; unsigned loop = 0; if (cfg_defer_taskrun) ring_flags |= IORING_SETUP_DEFER_TASKRUN; fd = socket(domain, type, protocol); if (fd == -1) t_error(1, errno, "socket t"); if (connect(fd, (void *)&td->dst_addr, cfg_alen)) t_error(1, errno, "connect, idx %i", td->idx); ret = io_uring_queue_init(512, &ring, ring_flags); if (ret) t_error(1, ret, "io_uring: queue init"); set_cpu_affinity(); set_iowq_affinity(&ring); if (cfg_fixed_files) { ret = io_uring_register_files(&ring, &fd, 1); if (ret < 0) t_error(1, ret, "io_uring: files registration"); } if (cfg_reg_ringfd) { ret = io_uring_register_ring_fd(&ring); if (ret < 0) t_error(1, ret, "io_uring: io_uring_register_ring_fd"); } iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) t_error(1, ret, "io_uring: buffer registration"); if (cfg_rx_poll) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, fd, POLLIN); ret = io_uring_submit(&ring); if (ret != 1) t_error(1, ret, "submit poll"); } pthread_barrier_wait(&barrier); tstart = gettimeofday_ms(); do { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; unsigned buf_idx = 0; unsigned msg_flags = MSG_WAITALL; for (i = 0; i < cfg_nr_reqs; i++) { sqe = io_uring_get_sqe(&ring); if (!cfg_zc) io_uring_prep_send(sqe, fd, payload, cfg_payload_len, 0); else { io_uring_prep_send_zc(sqe, fd, payload, cfg_payload_len, msg_flags, 0); if (cfg_fixed_buf) { sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; sqe->buf_index = buf_idx; } } sqe->user_data = 1; if (cfg_fixed_files) { sqe->fd = 0; sqe->flags |= IOSQE_FIXED_FILE; } } if (cfg_defer_taskrun && compl_cqes >= notif_slack) ret = io_uring_submit_and_get_events(&ring); else ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) t_error(1, ret, "submit"); for (i = 0; i < cfg_nr_reqs; i++) { cqe = wait_cqe_fast(&ring); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) t_error(1, -EINVAL, "F_MORE notif"); compl_cqes--; i--; io_uring_cqe_seen(&ring, cqe); continue; } if (cqe->flags & IORING_CQE_F_MORE) compl_cqes++; if (cqe->res >= 0) { td->packets++; td->bytes += cqe->res; } else if (cqe->res == -ECONNREFUSED || cqe->res == -EPIPE || cqe->res == -ECONNRESET) { fprintf(stderr, "Connection failure\n"); goto out_fail; } else if (cqe->res != -EAGAIN) { t_error(1, cqe->res, "send failed"); } io_uring_cqe_seen(&ring, cqe); } if (should_stop) break; } while ((++loop % 16 != 0) || gettimeofday_ms() < tstart + cfg_runtime_ms); td->dt_ms = gettimeofday_ms() - tstart; out_fail: shutdown(fd, SHUT_RDWR); if (close(fd)) t_error(1, errno, "close"); while (compl_cqes) { struct io_uring_cqe *cqe = wait_cqe_fast(&ring); io_uring_cqe_seen(&ring, cqe); compl_cqes--; } io_uring_queue_exit(&ring); } static void *do_test(void *arg) { struct thread_data *td = arg; int protocol = 0; setup_sockaddr(cfg_family, str_addr, &td->dst_addr); do_tx(td, cfg_family, cfg_type, protocol); pthread_exit(&td->ret); return NULL; } static void usage(const char *filepath) { printf("Usage:\t%s -D [options]\n", filepath); printf("\t%s -R [options]\n\n", filepath); printf(" -4\t\tUse IPv4\n"); printf(" -6\t\tUse IPv4\n"); printf(" -D
\tDestination address\n"); printf(" -p \tServer port to listen on/connect to\n"); printf(" -s \tBytes per request\n"); printf(" -s \tBytes per request\n"); printf(" -n \tNumber of parallel requests\n"); printf(" -z \tZerocopy mode, 0 to disable, enabled otherwise\n"); printf(" -b \tUse registered buffers\n"); printf(" -l \tUse huge pages\n"); printf(" -d\t\tUse defer taskrun\n"); printf(" -C \tPin to the specified CPU\n"); printf(" -T \tNumber of threads to use for sending\n"); printf(" -R\t\tPlay the server role\n"); printf(" -t \tTime in seconds\n"); } static void parse_opts(int argc, char **argv) { const int max_payload_len = IP_MAXPACKET - sizeof(struct ipv6hdr) - sizeof(struct tcphdr) - 40 /* max tcp options */; int c; char *daddr = NULL; if (argc <= 1) { usage(argv[0]); exit(0); } cfg_payload_len = max_payload_len; while ((c = getopt(argc, argv, "46D:p:s:t:n:z:b:l:dC:T:Ry")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) t_error(1, 0, "Pass one of -4 or -6"); cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); break; case '6': if (cfg_family != PF_UNSPEC) t_error(1, 0, "Pass one of -4 or -6"); cfg_family = PF_INET6; cfg_alen = sizeof(struct sockaddr_in6); break; case 'D': daddr = optarg; break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; case 's': cfg_payload_len = strtoul(optarg, NULL, 0); break; case 't': cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; break; case 'n': cfg_nr_reqs = strtoul(optarg, NULL, 0); break; case 'z': cfg_zc = strtoul(optarg, NULL, 0); break; case 'b': cfg_fixed_buf = strtoul(optarg, NULL, 0); break; case 'l': cfg_hugetlb = strtoul(optarg, NULL, 0); break; case 'd': cfg_defer_taskrun = 1; break; case 'C': cfg_cpu = strtol(optarg, NULL, 0); break; case 'T': cfg_nr_threads = strtol(optarg, NULL, 0); if (cfg_nr_threads > MAX_THREADS) t_error(1, 0, "too many threads\n"); break; case 'R': cfg_rx = 1; break; case 'y': cfg_rx_poll = 1; break; } } if (cfg_nr_reqs > MAX_SUBMIT_NR) t_error(1, 0, "-n: submit batch nr exceeds max (%d)", MAX_SUBMIT_NR); if (cfg_payload_len > max_payload_len) t_error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); str_addr = daddr; if (optind != argc - 1) usage(argv[0]); } int main(int argc, char **argv) { unsigned long long tsum = 0; unsigned long long packets = 0, bytes = 0; struct thread_data *td; const char *cfg_test; unsigned int i; void *res; parse_opts(argc, argv); set_cpu_affinity(); payload = payload_buf; if (cfg_hugetlb) { payload = mmap(NULL, 2*1024*1024, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (payload == MAP_FAILED) { fprintf(stderr, "hugetlb alloc failed\n"); return 1; } } cfg_test = argv[argc - 1]; if (!strcmp(cfg_test, "tcp")) cfg_type = SOCK_STREAM; else if (!strcmp(cfg_test, "udp")) cfg_type = SOCK_DGRAM; else t_error(1, 0, "unknown cfg_test %s", cfg_test); pthread_barrier_init(&barrier, NULL, cfg_nr_threads); for (i = 0; i < IP_MAXPACKET; i++) payload[i] = 'a' + (i % 26); for (i = 0; i < cfg_nr_threads; i++) { td = &threads[i]; td->idx = i; } if (cfg_rx) do_setup_rx(cfg_family, cfg_type, 0); if (!cfg_rx) signal(SIGINT, sigint_handler); for (i = 0; i < cfg_nr_threads; i++) pthread_create(&threads[i].thread, NULL, !cfg_rx ? do_test : do_rx, &threads[i]); for (i = 0; i < cfg_nr_threads; i++) { td = &threads[i]; pthread_join(td->thread, &res); packets += td->packets; bytes += td->bytes; tsum += td->dt_ms; } tsum = tsum / cfg_nr_threads; if (!tsum) { printf("The run is too short, can't gather stats\n"); } else { printf("packets=%llu (MB=%llu), rps=%llu (MB/s=%llu)\n", packets, bytes >> 20, packets * 1000 / tsum, (bytes >> 20) * 1000 / tsum); } pthread_barrier_destroy(&barrier); return 0; } liburing-2.6/examples/ucontext-cp.c000066400000000000000000000130311461424365000174320ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * gcc -Wall -O2 -D_GNU_SOURCE -o ucontext-cp ucontext-cp.c -luring */ #define _POSIX_C_SOURCE 199309L #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #define QD 64 #define BS 1024 #ifndef SIGSTKSZ #define SIGSTKSZ 8192 #endif typedef struct { struct io_uring *ring; unsigned char *stack_buf; ucontext_t ctx_main, ctx_fnew; } async_context; typedef struct { async_context *pctx; int *psuccess; int *pfailure; int infd; int outfd; } arguments_bundle; #define DEFINE_AWAIT_OP(operation) \ static ssize_t await_##operation( \ async_context *pctx, \ int fd, \ const struct iovec *ioves, \ unsigned int nr_vecs, \ off_t offset) \ { \ struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring); \ struct io_uring_cqe *cqe; \ \ if (!sqe) \ return -1; \ \ io_uring_prep_##operation(sqe, fd, ioves, nr_vecs, offset); \ io_uring_sqe_set_data(sqe, pctx); \ swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); \ io_uring_peek_cqe(pctx->ring, &cqe); \ assert(cqe); \ io_uring_cqe_seen(pctx->ring, cqe); \ \ return cqe->res; \ } DEFINE_AWAIT_OP(readv) DEFINE_AWAIT_OP(writev) #undef DEFINE_AWAIT_OP static int await_delay(async_context *pctx, time_t seconds) { struct io_uring_sqe *sqe = io_uring_get_sqe(pctx->ring); struct io_uring_cqe *cqe; struct __kernel_timespec ts = { .tv_sec = seconds, .tv_nsec = 0 }; if (!sqe) return -1; io_uring_prep_timeout(sqe, &ts, 0, 0); io_uring_sqe_set_data(sqe, pctx); swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); io_uring_peek_cqe(pctx->ring, &cqe); assert(cqe); io_uring_cqe_seen(pctx->ring, cqe); return 0; } static int setup_context(async_context *pctx, struct io_uring *ring) { int ret; pctx->ring = ring; ret = getcontext(&pctx->ctx_fnew); if (ret < 0) { perror("getcontext"); return -1; } pctx->stack_buf = malloc(SIGSTKSZ); if (!pctx->stack_buf) { perror("malloc"); return -1; } pctx->ctx_fnew.uc_stack.ss_sp = pctx->stack_buf; pctx->ctx_fnew.uc_stack.ss_size = SIGSTKSZ; pctx->ctx_fnew.uc_link = &pctx->ctx_main; return 0; } static int copy_file(async_context *pctx, int infd, int outfd, struct iovec* piov) { off_t offset = 0; for (;;) { ssize_t bytes_read; printf("%d->%d: readv %ld bytes from %ld\n", infd, outfd, (long) piov->iov_len, (long) offset); if ((bytes_read = await_readv(pctx, infd, piov, 1, offset)) < 0) { perror("await_readv"); return 1; } if (bytes_read == 0) return 0; piov->iov_len = bytes_read; printf("%d->%d: writev %ld bytes from %ld\n", infd, outfd, (long) piov->iov_len, (long) offset); if (await_writev(pctx, outfd, piov, 1, offset) != bytes_read) { perror("await_writev"); return 1; } if (bytes_read < BS) return 0; offset += bytes_read; printf("%d->%d: wait %ds\n", infd, outfd, 1); await_delay(pctx, 1); } } static void copy_file_wrapper(arguments_bundle *pbundle) { struct iovec iov = { .iov_base = malloc(BS), .iov_len = BS, }; async_context *pctx = pbundle->pctx; int ret = copy_file(pctx, pbundle->infd, pbundle->outfd, &iov); printf("%d->%d: done with ret code %d\n", pbundle->infd, pbundle->outfd, ret); if (ret == 0) { ++*pbundle->psuccess; } else { ++*pbundle->pfailure; } free(iov.iov_base); close(pbundle->infd); close(pbundle->outfd); free(pbundle->pctx->stack_buf); free(pbundle->pctx); free(pbundle); swapcontext(&pctx->ctx_fnew, &pctx->ctx_main); } int main(int argc, char *argv[]) { struct io_uring ring; int i, req_count, ret; int success = 0, failure = 0; if (argc < 3) { fprintf(stderr, "%s: infile1 outfile1 [infile2 outfile2 [...]]\n", argv[0]); return 1; } ret = io_uring_queue_init(QD, &ring, 0); if (ret < 0) { fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return -1; } req_count = (argc - 1) / 2; printf("copying %d files...\n", req_count); for (i = 1; i < argc; i += 2) { int infd, outfd; async_context *pctx = malloc(sizeof(*pctx)); if (!pctx || setup_context(pctx, &ring)) return 1; infd = open(argv[i], O_RDONLY); if (infd < 0) { perror("open infile"); return 1; } outfd = open(argv[i + 1], O_WRONLY | O_CREAT | O_TRUNC, 0644); if (outfd < 0) { perror("open outfile"); return 1; } arguments_bundle *pbundle = malloc(sizeof(*pbundle)); pbundle->pctx = pctx; pbundle->psuccess = &success; pbundle->pfailure = &failure; pbundle->infd = infd; pbundle->outfd = outfd; makecontext(&pctx->ctx_fnew, (void (*)(void)) copy_file_wrapper, 1, pbundle); if (swapcontext(&pctx->ctx_main, &pctx->ctx_fnew)) { perror("swapcontext"); return 1; } } /* event loop */ while (success + failure < req_count) { struct io_uring_cqe *cqe; /* usually be timed waiting */ ret = io_uring_submit_and_wait(&ring, 1); if (ret < 0) { fprintf(stderr, "submit_and_wait: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait_cqe: %s\n", strerror(-ret)); return 1; } async_context *pctx = io_uring_cqe_get_data(cqe); if (swapcontext(&pctx->ctx_main, &pctx->ctx_fnew)) { perror("swapcontext"); return 1; } } io_uring_queue_exit(&ring); printf("finished with %d success(es) and %d failure(s)\n", success, failure); return failure > 0; } liburing-2.6/liburing-ffi.pc.in000066400000000000000000000003621461424365000165100ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=${prefix} libdir=@libdir@ includedir=@includedir@ Name: @NAME@ Version: @VERSION@ Description: io_uring FFI library URL: https://git.kernel.dk/cgit/liburing/ Libs: -L${libdir} -luring-ffi Cflags: -I${includedir} liburing-2.6/liburing.pc.in000066400000000000000000000003521461424365000157450ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=${prefix} libdir=@libdir@ includedir=@includedir@ Name: @NAME@ Version: @VERSION@ Description: io_uring library URL: https://git.kernel.dk/cgit/liburing/ Libs: -L${libdir} -luring Cflags: -I${includedir} liburing-2.6/liburing.spec000066400000000000000000000033171461424365000156740ustar00rootroot00000000000000Name: liburing Version: 2.6 Release: 1%{?dist} Summary: Linux-native io_uring I/O access library License: (GPLv2 with exceptions and LGPLv2+) or MIT Source0: https://brick.kernel.dk/snaps/%{name}-%{version}.tar.gz Source1: https://brick.kernel.dk/snaps/%{name}-%{version}.tar.gz.asc URL: https://git.kernel.dk/cgit/liburing/ BuildRequires: gcc BuildRequires: make %description Provides native async IO for the Linux kernel, in a fast and efficient manner, for both buffered and O_DIRECT. %package devel Summary: Development files for Linux-native io_uring I/O access library Requires: %{name}%{_isa} = %{version}-%{release} Requires: pkgconfig %description devel This package provides header files to include and libraries to link with for the Linux-native io_uring. %prep %autosetup %build %set_build_flags ./configure --prefix=%{_prefix} --libdir=%{_libdir} --libdevdir=%{_libdir} --mandir=%{_mandir} --includedir=%{_includedir} %make_build %install %make_install %files %attr(0755,root,root) %{_libdir}/liburing.so.* %license COPYING %files devel %{_includedir}/liburing/ %{_includedir}/liburing.h %{_libdir}/liburing.so %exclude %{_libdir}/liburing.a %{_libdir}/pkgconfig/* %{_mandir}/man2/* %{_mandir}/man3/* %{_mandir}/man7/* %changelog * Thu Oct 31 2019 Jeff Moyer - 0.2-1 - Add io_uring_cq_ready() - Add io_uring_peek_batch_cqe() - Add io_uring_prep_accept() - Add io_uring_prep_{recv,send}msg() - Add io_uring_prep_timeout_remove() - Add io_uring_queue_init_params() - Add io_uring_register_files_update() - Add io_uring_sq_space_left() - Add io_uring_wait_cqe_timeout() - Add io_uring_wait_cqes() - Add io_uring_wait_cqes_timeout() * Tue Jan 8 2019 Jens Axboe - 0.1 - Initial version liburing-2.6/make-debs.sh000077500000000000000000000032751461424365000153770ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright (C) 2019 Liu Changcheng # Author: Liu Changcheng # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # set -xe # Create dir for build base=${1:-/tmp/release} distro=unstable releasedir=$base/$(lsb_release -si)/liburing rm -rf $releasedir mkdir -p $releasedir HEAD=$(which head) DCH=$(which dch) src_dir=$(readlink -e `basename $0`) liburing_dir=$(dirname $src_dir) basename=$(basename $liburing_dir) dirname=$(dirname $liburing_dir) version=$(git describe --match "lib*" | cut -d '-' -f 2) outfile="liburing-$version" orgfile=$(echo $outfile | tr '-' '_') # Prepare source code cp -arf ${dirname}/${basename} ${releasedir}/${outfile} cd ${releasedir}/${outfile} git clean -dxf # Change changelog if it's needed cur_ver=`$HEAD < debian/changelog | sed -n -e 's/.* (\(.*\)) .*/\1/p'` if [ "$cur_ver" != "$version-1" ]; then $DCH -D $distro --force-distribution -b -v "$version-1" "new version" fi # Create tar archive cd ../ tar cvzf ${outfile}.tar.gz ${outfile} ln -s ${outfile}.tar.gz ${orgfile}.orig.tar.gz # Build debian package cd - debuild liburing-2.6/man/000077500000000000000000000000001461424365000137545ustar00rootroot00000000000000liburing-2.6/man/IO_URING_CHECK_VERSION.3000077700000000000000000000000001461424365000242112io_uring_check_version.3ustar00rootroot00000000000000liburing-2.6/man/IO_URING_VERSION_MAJOR.3000077700000000000000000000000001461424365000242442io_uring_check_version.3ustar00rootroot00000000000000liburing-2.6/man/IO_URING_VERSION_MINOR.3000077700000000000000000000000001461424365000242602io_uring_check_version.3ustar00rootroot00000000000000liburing-2.6/man/__io_uring_buf_ring_cq_advance.3000077700000000000000000000000001461424365000301012io_uring_buf_ring_cq_advance.3ustar00rootroot00000000000000liburing-2.6/man/io_uring.7000066400000000000000000000607261461424365000156720ustar00rootroot00000000000000.\" Copyright (C) 2020 Shuveb Hussain .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring 7 2020-07-26 "Linux" "Linux Programmer's Manual" .SH NAME io_uring \- Asynchronous I/O facility .SH SYNOPSIS .nf .B "#include " .fi .PP .SH DESCRIPTION .PP .B io_uring is a Linux-specific API for asynchronous I/O. It allows the user to submit one or more I/O requests, which are processed asynchronously without blocking the calling process. .B io_uring gets its name from ring buffers which are shared between user space and kernel space. This arrangement allows for efficient I/O, while avoiding the overhead of copying buffers between them, where possible. This interface makes .B io_uring different from other UNIX I/O APIs, wherein, rather than just communicate between kernel and user space with system calls, ring buffers are used as the main mode of communication. This arrangement has various performance benefits which are discussed in a separate section below. This man page uses the terms shared buffers, shared ring buffers and queues interchangeably. .PP The general programming model you need to follow for .B io_uring is outlined below .IP \(bu Set up shared buffers with .BR io_uring_setup (2) and .BR mmap (2), mapping into user space shared buffers for the submission queue (SQ) and the completion queue (CQ). You place I/O requests you want to make on the SQ, while the kernel places the results of those operations on the CQ. .IP \(bu For every I/O request you need to make (like to read a file, write a file, accept a socket connection, etc), you create a submission queue entry, or SQE, describe the I/O operation you need to get done and add it to the tail of the submission queue (SQ). Each I/O operation is, in essence, the equivalent of a system call you would have made otherwise, if you were not using .BR io_uring . You can add more than one SQE to the queue depending on the number of operations you want to request. .IP \(bu After you add one or more SQEs, you need to call .BR io_uring_enter (2) to tell the kernel to dequeue your I/O requests off the SQ and begin processing them. .IP \(bu For each SQE you submit, once it is done processing the request, the kernel places a completion queue event or CQE at the tail of the completion queue or CQ. The kernel places exactly one matching CQE in the CQ for every SQE you submit on the SQ. After you retrieve a CQE, minimally, you might be interested in checking the .I res field of the CQE structure, which corresponds to the return value of the system call's equivalent, had you used it directly without using .BR io_uring . For instance, a read operation under .BR io_uring , started with the .BR IORING_OP_READ operation, issues the equivalent of the .BR read (2) system call. In practice, it mixes the semantics of .BR pread (2) and .BR preadv2 (2) in that it takes an explicit offset, and supports using -1 for the offset to indicate that the current file position should be used instead of passing in an explicit offset. See the opcode documentation for more details. Given that io_uring is an async interface, .I errno is never used for passing back error information. Instead, .I res will contain what the equivalent system call would have returned in case of success, and in case of error .I res will contain .I -errno . For example, if the normal read system call would have returned -1 and set .I errno to .B EINVAL , then .I res would contain .B -EINVAL . If the normal system call would have returned a read size of 1024, then .I res would contain 1024. .IP \(bu Optionally, .BR io_uring_enter (2) can also wait for a specified number of requests to be processed by the kernel before it returns. If you specified a certain number of completions to wait for, the kernel would have placed at least those many number of CQEs on the CQ, which you can then readily read, right after the return from .BR io_uring_enter (2). .IP \(bu It is important to remember that I/O requests submitted to the kernel can complete in any order. It is not necessary for the kernel to process one request after another, in the order you placed them. Given that the interface is a ring, the requests are attempted in order, however that doesn't imply any sort of ordering on their completion. When more than one request is in flight, it is not possible to determine which one will complete first. When you dequeue CQEs off the CQ, you should always check which submitted request it corresponds to. The most common method for doing so is utilizing the .I user_data field in the request, which is passed back on the completion side. .PP Adding to and reading from the queues: .IP \(bu You add SQEs to the tail of the SQ. The kernel reads SQEs off the head of the queue. .IP \(bu The kernel adds CQEs to the tail of the CQ. You read CQEs off the head of the queue. .SS Submission queue polling One of the goals of .B io_uring is to provide a means for efficient I/O. To this end, .B io_uring supports a polling mode that lets you avoid the call to .BR io_uring_enter (2), which you use to inform the kernel that you have queued SQEs on to the SQ. With SQ Polling, .B io_uring starts a kernel thread that polls the submission queue for any I/O requests you submit by adding SQEs. With SQ Polling enabled, there is no need for you to call .BR io_uring_enter (2), letting you avoid the overhead of system calls. A designated kernel thread dequeues SQEs off the SQ as you add them and dispatches them for asynchronous processing. .SS Setting up io_uring .PP The main steps in setting up .B io_uring consist of mapping in the shared buffers with .BR mmap (2) calls. In the example program included in this man page, the function .BR app_setup_uring () sets up .B io_uring with a QUEUE_DEPTH deep submission queue. Pay attention to the 2 .BR mmap (2) calls that set up the shared submission and completion queues. If your kernel is older than version 5.4, three .BR mmap(2) calls are required. .PP .SS Submitting I/O requests The process of submitting a request consists of describing the I/O operation you need to get done using an .B io_uring_sqe structure instance. These details describe the equivalent system call and its parameters. Because the range of I/O operations Linux supports are very varied and the .B io_uring_sqe structure needs to be able to describe them, it has several fields, some packed into unions for space efficiency. Here is a simplified version of struct .B io_uring_sqe with some of the most often used fields: .PP .in +4n .EX struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __s32 fd; /* file descriptor to do IO on */ __u64 off; /* offset into file */ __u64 addr; /* pointer to buffer or iovecs */ __u32 len; /* buffer size or number of iovecs */ __u64 user_data; /* data to be passed back at completion time */ __u8 flags; /* IOSQE_ flags */ ... }; .EE .in Here is struct .B io_uring_sqe in full: .in +4n .EX struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ __u64 addr2; }; union { __u64 addr; /* pointer to buffer or iovecs */ __u64 splice_off_in; }; __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; __u32 fsync_flags; __u16 poll_events; /* compatibility */ __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; __u32 open_flags; __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { struct { /* pack this to avoid bogus arm OABI complaints */ union { /* index into fixed buffers, if used */ __u16 buf_index; /* for grouped buffer selection */ __u16 buf_group; } __attribute__((packed)); /* personality to use, if used */ __u16 personality; __s32 splice_fd_in; }; __u64 __pad2[3]; }; }; .EE .in .PP To submit an I/O request to .BR io_uring , you need to acquire a submission queue entry (SQE) from the submission queue (SQ), fill it up with details of the operation you want to submit and call .BR io_uring_enter (2). There are helper functions of the form io_uring_prep_X to enable proper setup of the SQE. If you want to avoid calling .BR io_uring_enter (2), you have the option of setting up Submission Queue Polling. .PP SQEs are added to the tail of the submission queue. The kernel picks up SQEs off the head of the SQ. The general algorithm to get the next available SQE and update the tail is as follows. .PP .in +4n .EX struct io_uring_sqe *sqe; unsigned tail, index; tail = *sqring->tail; index = tail & (*sqring->ring_mask); sqe = &sqring->sqes[index]; /* fill up details about this I/O request */ describe_io(sqe); /* fill the sqe index into the SQ ring array */ sqring->array[index] = index; tail++; atomic_store_explicit(sqring->tail, tail, memory_order_release); .EE .in .PP To get the index of an entry, the application must mask the current tail index with the size mask of the ring. This holds true for both SQs and CQs. Once the SQE is acquired, the necessary fields are filled in, describing the request. While the CQ ring directly indexes the shared array of CQEs, the submission side has an indirection array between them. The submission side ring buffer is an index into this array, which in turn contains the index into the SQEs. .PP The following code snippet demonstrates how a read operation, an equivalent of a .BR preadv2 (2) system call is described by filling up an SQE with the necessary parameters. .PP .in +4n .EX struct iovec iovecs[16]; ... sqe->opcode = IORING_OP_READV; sqe->fd = fd; sqe->addr = (unsigned long) iovecs; sqe->len = 16; sqe->off = offset; sqe->flags = 0; .EE .in .TP .B Memory ordering Modern compilers and CPUs freely reorder reads and writes without affecting the program's outcome to optimize performance. Some aspects of this need to be kept in mind on SMP systems since .B io_uring involves buffers shared between kernel and user space. These buffers are both visible and modifiable from kernel and user space. As heads and tails belonging to these shared buffers are updated by kernel and user space, changes need to be coherently visible on either side, irrespective of whether a CPU switch took place after the kernel-user mode switch happened. We use memory barriers to enforce this coherency. Being significantly large subjects on their own, memory barriers are out of scope for further discussion on this man page. .TP .B Letting the kernel know about I/O submissions Once you place one or more SQEs on to the SQ, you need to let the kernel know that you've done so. You can do this by calling the .BR io_uring_enter (2) system call. This system call is also capable of waiting for a specified count of events to complete. This way, you can be sure to find completion events in the completion queue without having to poll it for events later. .SS Reading completion events Similar to the submission queue (SQ), the completion queue (CQ) is a shared buffer between the kernel and user space. Whereas you placed submission queue entries on the tail of the SQ and the kernel read off the head, when it comes to the CQ, the kernel places completion queue events or CQEs on the tail of the CQ and you read off its head. .PP Submission is flexible (and thus a bit more complicated) since it needs to be able to encode different types of system calls that take various parameters. Completion, on the other hand is simpler since we're looking only for a return value back from the kernel. This is easily understood by looking at the completion queue event structure, struct .BR io_uring_cqe : .PP .in +4n .EX struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ __u32 flags; }; .EE .in .PP Here, .I user_data is custom data that is passed unchanged from submission to completion. That is, from SQEs to CQEs. This field can be used to set context, uniquely identifying submissions that got completed. Given that I/O requests can complete in any order, this field can be used to correlate a submission with a completion. .I res is the result from the system call that was performed as part of the submission; its return value. The .I flags field carries request-specific information. As of the 6.0 kernel, the following flags are defined: .TP .B IORING_CQE_F_BUFFER If set, the upper 16 bits of the flags field carries the buffer ID that was chosen for this request. The request must have been issued with .B IOSQE_BUFFER_SELECT set, and used with a request type that supports buffer selection. Additionally, buffers must have been provided upfront either via the .B IORING_OP_PROVIDE_BUFFERS or the .B IORING_REGISTER_PBUF_RING methods. .TP .B IORING_CQE_F_MORE If set, the application should expect more completions from the request. This is used for requests that can generate multiple completions, such as multi-shot requests, receive, or accept. .TP .B IORING_CQE_F_SOCK_NONEMPTY If set, upon receiving the data from the socket in the current request, the socket still had data left on completion of this request. .TP .B IORING_CQE_F_NOTIF Set for notification CQEs, as seen with the zero-copy networking send and receive support. .PP The general sequence to read completion events off the completion queue is as follows: .PP .in +4n .EX unsigned head; head = *cqring->head; if (head != atomic_load_acquire(cqring->tail)) { struct io_uring_cqe *cqe; unsigned index; index = head & (cqring->mask); cqe = &cqring->cqes[index]; /* process completed CQE */ process_cqe(cqe); /* CQE consumption complete */ head++; } atomic_store_explicit(cqring->head, head, memory_order_release); .EE .in .PP It helps to be reminded that the kernel adds CQEs to the tail of the CQ, while you need to dequeue them off the head. To get the index of an entry at the head, the application must mask the current head index with the size mask of the ring. Once the CQE has been consumed or processed, the head needs to be updated to reflect the consumption of the CQE. Attention should be paid to the read and write barriers to ensure successful read and update of the head. .SS io_uring performance Because of the shared ring buffers between kernel and user space, .B io_uring can be a zero-copy system. Copying buffers to and from becomes necessary when system calls that transfer data between kernel and user space are involved. But since the bulk of the communication in .B io_uring is via buffers shared between the kernel and user space, this huge performance overhead is completely avoided. .PP While system calls may not seem like a significant overhead, in high performance applications, making a lot of them will begin to matter. While workarounds the operating system has in place to deal with Spectre and Meltdown are ideally best done away with, unfortunately, some of these workarounds are around the system call interface, making system calls not as cheap as before on affected hardware. While newer hardware should not need these workarounds, hardware with these vulnerabilities can be expected to be in the wild for a long time. While using synchronous programming interfaces or even when using asynchronous programming interfaces under Linux, there is at least one system call involved in the submission of each request. In .BR io_uring , on the other hand, you can batch several requests in one go, simply by queueing up multiple SQEs, each describing an I/O operation you want and make a single call to .BR io_uring_enter (2). This is possible due to .BR io_uring 's shared buffers based design. .PP While this batching in itself can avoid the overhead associated with potentially multiple and frequent system calls, you can reduce even this overhead further with Submission Queue Polling, by having the kernel poll and pick up your SQEs for processing as you add them to the submission queue. This avoids the .BR io_uring_enter (2) call you need to make to tell the kernel to pick SQEs up. For high-performance applications, this means even fewer system call overheads. .SH CONFORMING TO .B io_uring is Linux-specific. .SH EXAMPLES The following example uses .B io_uring to copy stdin to stdout. Using shell redirection, you should be able to copy files with this example. Because it uses a queue depth of only one, this example processes I/O requests one after the other. It is purposefully kept this way to aid understanding. In real-world scenarios however, you'll want to have a larger queue depth to parallelize I/O request processing so as to gain the kind of performance benefits .B io_uring provides with its asynchronous processing of requests. .PP .EX #include #include #include #include #include #include #include #include #include #include #include #include #include #define QUEUE_DEPTH 1 #define BLOCK_SZ 1024 /* Macros for barriers needed by io_uring */ #define io_uring_smp_store_release(p, v) \\ atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \\ memory_order_release) #define io_uring_smp_load_acquire(p) \\ atomic_load_explicit((_Atomic typeof(*(p)) *)(p), \\ memory_order_acquire) int ring_fd; unsigned *sring_tail, *sring_mask, *sring_array, *cring_head, *cring_tail, *cring_mask; struct io_uring_sqe *sqes; struct io_uring_cqe *cqes; char buff[BLOCK_SZ]; off_t offset; /* * System call wrappers provided since glibc does not yet * provide wrappers for io_uring system calls. * */ int io_uring_setup(unsigned entries, struct io_uring_params *p) { return (int) syscall(__NR_io_uring_setup, entries, p); } int io_uring_enter(int ring_fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags) { return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete, flags, NULL, 0); } int app_setup_uring(void) { struct io_uring_params p; void *sq_ptr, *cq_ptr; /* See io_uring_setup(2) for io_uring_params.flags you can set */ memset(&p, 0, sizeof(p)); ring_fd = io_uring_setup(QUEUE_DEPTH, &p); if (ring_fd < 0) { perror("io_uring_setup"); return 1; } /* * io_uring communication happens via 2 shared kernel-user space ring * buffers, which can be jointly mapped with a single mmap() call in * kernels >= 5.4. */ int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned); int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe); /* Rather than check for kernel version, the recommended way is to * check the features field of the io_uring_params structure, which is a * bitmask. If IORING_FEAT_SINGLE_MMAP is set, we can do away with the * second mmap() call to map in the completion ring separately. */ if (p.features & IORING_FEAT_SINGLE_MMAP) { if (cring_sz > sring_sz) sring_sz = cring_sz; cring_sz = sring_sz; } /* Map in the submission and completion queue ring buffers. * Kernels < 5.4 only map in the submission queue, though. */ sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQ_RING); if (sq_ptr == MAP_FAILED) { perror("mmap"); return 1; } if (p.features & IORING_FEAT_SINGLE_MMAP) { cq_ptr = sq_ptr; } else { /* Map in the completion queue ring buffer in older kernels separately */ cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_CQ_RING); if (cq_ptr == MAP_FAILED) { perror("mmap"); return 1; } } /* Save useful fields for later easy reference */ sring_tail = sq_ptr + p.sq_off.tail; sring_mask = sq_ptr + p.sq_off.ring_mask; sring_array = sq_ptr + p.sq_off.array; /* Map in the submission queue entries array */ sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQES); if (sqes == MAP_FAILED) { perror("mmap"); return 1; } /* Save useful fields for later easy reference */ cring_head = cq_ptr + p.cq_off.head; cring_tail = cq_ptr + p.cq_off.tail; cring_mask = cq_ptr + p.cq_off.ring_mask; cqes = cq_ptr + p.cq_off.cqes; return 0; } /* * Read from completion queue. * In this function, we read completion events from the completion queue. * We dequeue the CQE, update and head and return the result of the operation. * */ int read_from_cq() { struct io_uring_cqe *cqe; unsigned head; /* Read barrier */ head = io_uring_smp_load_acquire(cring_head); /* * Remember, this is a ring buffer. If head == tail, it means that the * buffer is empty. * */ if (head == *cring_tail) return -1; /* Get the entry */ cqe = &cqes[head & (*cring_mask)]; if (cqe->res < 0) fprintf(stderr, "Error: %s\\n", strerror(abs(cqe->res))); head++; /* Write barrier so that update to the head are made visible */ io_uring_smp_store_release(cring_head, head); return cqe->res; } /* * Submit a read or a write request to the submission queue. * */ int submit_to_sq(int fd, int op) { unsigned index, tail; /* Add our submission queue entry to the tail of the SQE ring buffer */ tail = *sring_tail; index = tail & *sring_mask; struct io_uring_sqe *sqe = &sqes[index]; /* Fill in the parameters required for the read or write operation */ sqe->opcode = op; sqe->fd = fd; sqe->addr = (unsigned long) buff; if (op == IORING_OP_READ) { memset(buff, 0, sizeof(buff)); sqe->len = BLOCK_SZ; } else { sqe->len = strlen(buff); } sqe->off = offset; sring_array[index] = index; tail++; /* Update the tail */ io_uring_smp_store_release(sring_tail, tail); /* * Tell the kernel we have submitted events with the io_uring_enter() * system call. We also pass in the IOURING_ENTER_GETEVENTS flag which * causes the io_uring_enter() call to wait until min_complete * (the 3rd param) events complete. * */ int ret = io_uring_enter(ring_fd, 1,1, IORING_ENTER_GETEVENTS); if(ret < 0) { perror("io_uring_enter"); return -1; } return ret; } int main(int argc, char *argv[]) { int res; /* Setup io_uring for use */ if(app_setup_uring()) { fprintf(stderr, "Unable to setup uring!\\n"); return 1; } /* * A while loop that reads from stdin and writes to stdout. * Breaks on EOF. */ while (1) { /* Initiate read from stdin and wait for it to complete */ submit_to_sq(STDIN_FILENO, IORING_OP_READ); /* Read completion queue entry */ res = read_from_cq(); if (res > 0) { /* Read successful. Write to stdout. */ submit_to_sq(STDOUT_FILENO, IORING_OP_WRITE); read_from_cq(); } else if (res == 0) { /* reached EOF */ break; } else if (res < 0) { /* Error reading file */ fprintf(stderr, "Error: %s\\n", strerror(abs(res))); break; } offset += res; } return 0; } .EE .SH SEE ALSO .BR io_uring_enter (2) .BR io_uring_register (2) .BR io_uring_setup (2) liburing-2.6/man/io_uring_buf_ring_add.3000066400000000000000000000037741461424365000203510ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_add 3 "May 18, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_buf_ring_add \- add buffers to a shared buffer ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_buf_ring_add(struct io_uring_buf_ring *" br ", .BI " void *" addr ", .BI " unsigned int " len ", .BI " unsigned short " bid ", .BI " int " mask ", .BI " int " buf_offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_buf_ring_add (3) adds a new buffer to the shared buffer ring .IR br . The buffer address is indicated by .I addr and is of .I len bytes of length. .I bid is the buffer ID, which will be returned in the CQE. .I mask is the size mask of the ring, available from .BR io_uring_buf_ring_mask (3) . .I buf_offset is the offset to insert at from the current tail. If just one buffer is provided before the ring tail is committed with .BR io_uring_buf_ring_advance (3) or .BR io_uring_buf_ring_cq_advance (3), then .I buf_offset should be 0. If buffers are provided in a loop before being committed, the .I buf_offset must be incremented by one for each buffer added. .SH RETURN VALUE None .SH NOTES liburing (or the kernel, for that matter) doesn't care about what buffer ID maps to what buffer, and in fact when recycling buffers after use, the application is free to add a different buffer into the same buffer ID location. All that matters is that the application knows what a given buffer ID in time corresponds to in terms of virtual memory. There's no liburing or kernel assumption that these mappings are persistent over time, they can very well be different every time a given buffer ID is added to the provided buffer ring. .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_mask (3), .BR io_uring_buf_ring_advance (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_buf_ring_advance.3000066400000000000000000000014721461424365000212130ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_advance 3 "May 18, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_buf_ring_advance \- advance index of provided buffer in buffer ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_buf_ring_advance(struct io_uring_buf_ring *" br ", .BI " int " count ");" .fi .SH DESCRIPTION .PP The .BR io_uring_buf_ring_advance (3) commits .I count previously added buffers to the shared buffer ring .IR br , making them visible to the kernel and hence consumable. This passes ownership of the buffer to the ring. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_add (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_buf_ring_available.3000066400000000000000000000031601461424365000215260ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_available 3 "Jan 11, 2024" "liburing-2.6" "liburing Manual" .SH NAME io_uring_buf_ring_available \- return number of unconsumed provided ring buffer entries .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_buf_ring_available(struct io_uring *" ring ", .BI " struct io_uring_buf_ring *" br ", .BI " unsigned short " bgid ");" .fi .SH DESCRIPTION .PP The .BR io_uring_buf_ring_available (3) helper returns the number of unconsumed (by the kernel) entries in the .IR br provided buffer group belonging to the io_uring .IR ring and identified by the buffer group ID .IR bgid. Since the head of the provided buffer ring is only visible to the kernel, it's impossible to otherwise know how many unconsumed entries exist in the given provided buffer ring. This function query the kernel to return that number. .SH NOTES The returned number of entries reflect the amount of unconsumed entries at the time that it was queried. If inflight IO exists that may consume provided buffers from this buffer group, then the returned value is inherently racy. .SH RETURN VALUE Returns the number of unconsumed entries on success, which may be 0. In case of error, may return .BR -ENOENT if the specified buffer group doesn't exist, or .BR -EINVAL if the buffer group isn't of the correct type, or if the kernel doesn't support this feature. .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_add (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_buf_ring_cq_advance.3000066400000000000000000000033201461424365000216700ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_cq_advance 3 "May 18, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_buf_ring_cq_advance \- advance index of provided buffer and CQ ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_buf_ring_cq_advance(struct io_uring *" ring ", .BI " struct io_uring_buf_ring *" br ", .BI " int " count ");" .PP .BI "void __io_uring_buf_ring_cq_advance(struct io_uring *" ring ", .BI " struct io_uring_buf_ring *" br ", .BI " int " cq_count ", .BI " int " buf_count ");" .fi .SH DESCRIPTION .PP The .BR io_uring_buf_ring_cq_advance (3) commits .I count previously added buffers to the shared buffer ring .IR br , making them visible to the kernel and hence consumable. This passes ownership of the buffer to the ring. At the same time, it advances the CQ ring of .I ring by .I count amount. This effectively bundles both a .BR io_uring_buf_ring_advance (3) call and a .BR io_uring_cq_advance (3) into one operation. Since updating either ring index entails a store memory barrier, doing both at once is more efficient. The .BR __io_uring_buf_ring_cq_advance (3) function performs the same operation, except it splits the counts into two separate values. It advances the CQ ring by .I cq_count entries, and the buffer ring by .I buf_count entries rather than increment both by the same value. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_add (3), .BR io_uring_buf_ring_advance (3) liburing-2.6/man/io_uring_buf_ring_init.3000066400000000000000000000017031461424365000205520ustar00rootroot00000000000000.\" Copyright (C) 2022 Dylan Yudaken .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_init 3 "June 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_buf_ring_init \- Initialise a buffer ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_buf_ring_init(struct io_uring_buf_ring *" br ");" .fi .SH DESCRIPTION .PP .BR io_uring_buf_ring_init (3) initialises .IR br so that it is ready to be used. It may be called after .BR io_uring_register_buf_ring (3) but must be called before the buffer ring is used in any other way. .SH RETURN VALUE None .SH NOTES Unless manual setup is needed, it's recommended to use .BR io_uring_setup_buf_ring (3) as it provides a simpler way to setup a provided buffer ring. . .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_setup_buf_ring (3), .BR io_uring_buf_ring_add (3) .BR io_uring_buf_ring_advance (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_buf_ring_mask.3000066400000000000000000000012571461424365000205460ustar00rootroot00000000000000.\" Copyright (C) 2022 Dylan Yudaken .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_buf_ring_mask 3 "June 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_buf_ring_mask \- Calculate buffer ring mask size .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_buf_ring_mask(__u32 " ring_entries ");" .fi .SH DESCRIPTION .PP .BR io_uring_buf_ring_mask (3) calculates the appropriate size mask for a buffer ring. .IR ring_entries is the ring entries as specified in .BR io_uring_register_buf_ring (3) . .SH RETURN VALUE Size mask for the buffer ring. .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_add (3) liburing-2.6/man/io_uring_check_version.3000066400000000000000000000027631461424365000205650ustar00rootroot00000000000000.\" Copyright (C) 2022 Christian Hergert .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_check_version 3 "December 1, 2022" "liburing-2.4" "liburing Manual" .SH NAME io_uring_check_version \- functions and macros to check the liburing version .SH SYNOPSIS .nf .B #include .PP .BI "bool io_uring_check_version(int " major ", int " minor ");" .BI "IO_URING_CHECK_VERSION(" major ", " minor ");" .PP .BI "int io_uring_major_version(void);" .BI "IO_URING_VERSION_MAJOR;" .PP .BI "int io_uring_minor_version(void);" .BI "IO_URING_VERSION_MINOR;" .fi .SH DESCRIPTION .PP The .BR io_uring_check_version (3) function returns .I false if the liburing library loaded by the dynamic linker is greater-than or equal-to the .I major and .I minor numbers provided. .PP The .BR IO_URING_CHECK_VERSION (3) macro returns .I 0 if the liburing library being compiled against is greater-than or equal-to the .I major and .I minor numbers provided. .PP The .BR io_uring_major_version (3) function returns the .I major version number of the liburing library loaded by the dynamic linker. .PP The .BR IO_URING_VERSION_MAJOR (3) macro returns the .I major version number of the liburing library being compiled against. .PP The .BR io_uring_minor_version (3) function returns the .I minor version number of the liburing library loaded by the dynamic linker. .PP The .BR IO_URING_VERSION_MINOR (3) macro returns the .I minor version number of the liburing library being compiled against. liburing-2.6/man/io_uring_close_ring_fd.3000066400000000000000000000026451461424365000205370ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" Copyright (C) 2022 Josh Triplett .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_close_ring_fd 3 "September 25, 2022" "liburing-2.4" "liburing Manual" .SH NAME io_uring_close_ring_fd \- close a ring file descriptor and use it only via registered index .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_close_ring_fd(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP .BR io_uring_close_ring_fd (3) closes the ring file descriptor, which must have been previously registered. The file will remain open, but accessible only via the registered index, not via any file descriptor. Subsequent liburing calls will continue to work, using the registered ring fd. The kernel must support .BR IORING_FEAT_REG_REG_RING . Libraries that must avoid disrupting their users' uses of file descriptors, and must continue working even in the face of .BR close_range (2) and similar, can use .BR io_uring_close_ring_fd (3) to work with liburing without having any open file descriptor. .SH NOTES Each thread that wants to make use of io_uring must register the fd. A library that may get called from arbitrary theads may need to detect when it gets called on a previously unseen thread and create and register a ring for that thread. .SH RETURN VALUE Returns 1 on success, or .BR -errno on error. .SH SEE ALSO .BR io_uring_register_ring_fd (3) liburing-2.6/man/io_uring_cq_advance.3000066400000000000000000000024051461424365000200200ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_cq_advance 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_cq_advance \- mark one or more io_uring completion events as consumed .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_cq_advance(struct io_uring *" ring "," .BI " unsigned " nr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_cq_advance (3) function marks .I nr IO completions belonging to the .I ring param as consumed. After the caller has submitted a request with .BR io_uring_submit (3), the application can retrieve the completion with .BR io_uring_wait_cqe (3), .BR io_uring_peek_cqe (3), or any of the other CQE retrieval helpers, and mark it as consumed with .BR io_uring_cqe_seen (3). The function .BR io_uring_cqe_seen (3) calls the function .BR io_uring_cq_advance (3). Completions must be marked as seen, so their slot can get reused. Failure to do so will result in the same completion being returned on the next invocation. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqe (3), .BR io_uring_peek_cqe (3), .BR io_uring_wait_cqes (3), .BR io_uring_wait_cqe_timeout (3), .BR io_uring_cqe_seen (3) liburing-2.6/man/io_uring_cq_has_overflow.3000066400000000000000000000020731461424365000211160ustar00rootroot00000000000000.\" Copyright (C) 2022 Dylan Yudaken .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_cq_has_overflow 3 "September 5, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_cq_has_overflow \- returns if there are overflow entries waiting to move to the CQ ring .SH SYNOPSIS .nf .B #include .PP .BI "bool io_uring_cq_has_overflow(const struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_cq_has_overflow (3) function informs the application if CQ entries have overflowed and are waiting to be flushed to the CQ ring. For example using .BR io_uring_get_events (3) .SH NOTES Using this function is only valid if the ring has .B IORING_FEAT_NODROP set, as it's checking for a flag set by kernels supporting that feature. For really old kernels that don't support this feature, if CQE overflow is experienced the CQEs are lost. If that happens, the CQ ring overflow offset will get incremented. .SH RETURN VALUE True if there are CQ entries waiting to be flushed to the CQ ring. .SH SEE ALSO .BR io_uring_get_events (3) liburing-2.6/man/io_uring_cq_ready.3000066400000000000000000000012461461424365000175250ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_cq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_cq_ready \- returns number of unconsumed ready entries in the CQ ring .SH SYNOPSIS .nf .B #include .PP .BI "unsigned io_uring_cq_ready(const struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_cq_ready (3) function returns the number of unconsumed entries that are ready belonging to the .I ring param. .SH RETURN VALUE Returns the number of unconsumed ready entries in the CQ ring. .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqe (3) liburing-2.6/man/io_uring_cqe_get_data.3000066400000000000000000000024661461424365000203430ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_cqe_get_data 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_cqe_get_data \- get user data for completion event .SH SYNOPSIS .nf .B #include .PP .BI "void *io_uring_cqe_get_data(struct io_uring_cqe *" cqe ");" .BI " .BI "__u64 io_uring_cqe_get_data64(struct io_uring_cqe *" cqe ");" .fi .SH DESCRIPTION .PP The .BR io_uring_cqe_get_data (3) function returns the user_data with the completion queue entry .IR cqe as a data pointer. The .BR io_uring_cqe_get_data64 (3) function returns the user_data with the completion queue entry .IR cqe as a 64-bit data value. After the caller has received a completion queue entry (CQE) with .BR io_uring_wait_cqe (3), the application can call .BR io_uring_cqe_get_data (3) or .BR io_uring_cqe_get_data64 (3) function to retrieve the .I user_data value. This requires that .I user_data has been set earlier with the function .BR io_uring_sqe_set_data (3) or .BR io_uring_sqe_set_data64 (3). .SH RETURN VALUE If the .I user_data value has been set before submitting the request, it will be returned. Otherwise, the return value is undefined. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_sqe_set_data (3), .BR io_uring_sqe_submit (3) liburing-2.6/man/io_uring_cqe_get_data64.3000077700000000000000000000000001461424365000250572io_uring_cqe_get_data.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_cqe_seen.3000066400000000000000000000020641461424365000175170ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_cqe_seen 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_cqe_seen \- mark io_uring completion event as consumed .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_cqe_seen(struct io_uring *" ring "," .BI " struct io_uring_cqe *" cqe ");" .fi .SH DESCRIPTION .PP The .BR io_uring_cqe_seen (3) function marks the IO completion .I cqe belonging to the .I ring param as consumed. After the caller has submitted a request with .BR io_uring_submit (3), the application can retrieve the completion with .BR io_uring_wait_cqe (3), .BR io_uring_peek_cqe (3), or any of the other CQE retrieval helpers, and mark it as consumed with .BR io_uring_cqe_seen (3). Completions must be marked as completed so their slot can get reused. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_peek_cqe (3), .BR io_uring_wait_cqe (3), .BR io_uring_wait_cqes (3), .BR io_uring_wait_cqe_timeout (3) liburing-2.6/man/io_uring_enter.2000066400000000000000000001375551461424365000170670ustar00rootroot00000000000000.\" Copyright (C) 2019 Jens Axboe .\" Copyright (C) 2019 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_enter 2 2019-01-22 "Linux" "Linux Programmer's Manual" .SH NAME io_uring_enter \- initiate and/or complete asynchronous I/O .SH SYNOPSIS .nf .BR "#include " .PP .BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit , .BI " unsigned int " min_complete ", unsigned int " flags , .BI " sigset_t *" sig ); .PP .BI "int io_uring_enter2(unsigned int " fd ", unsigned int " to_submit , .BI " unsigned int " min_complete ", unsigned int " flags , .BI " sigset_t *" sig ", size_t " sz ); .fi .PP .SH DESCRIPTION .PP .BR io_uring_enter (2) is used to initiate and complete I/O using the shared submission and completion queues setup by a call to .BR io_uring_setup (2). A single call can both submit new I/O and wait for completions of I/O initiated by this call or previous calls to .BR io_uring_enter (2). .I fd is the file descriptor returned by .BR io_uring_setup (2). .I to_submit specifies the number of I/Os to submit from the submission queue. .I flags is a bitmask of the following values: .TP .B IORING_ENTER_GETEVENTS If this flag is set, then the system call will wait for the specified number of events in .I min_complete before returning. This flag can be set along with .I to_submit to both submit and complete events in a single system call. .TP .B IORING_ENTER_SQ_WAKEUP If the ring has been created with .B IORING_SETUP_SQPOLL, then this flag asks the kernel to wakeup the SQ kernel thread to submit IO. .TP .B IORING_ENTER_SQ_WAIT If the ring has been created with .B IORING_SETUP_SQPOLL, then the application has no real insight into when the SQ kernel thread has consumed entries from the SQ ring. This can lead to a situation where the application can no longer get a free SQE entry to submit, without knowing when it one becomes available as the SQ kernel thread consumes them. If the system call is used with this flag set, then it will wait until at least one entry is free in the SQ ring. .TP .B IORING_ENTER_EXT_ARG Since kernel 5.11, the system calls arguments have been modified to look like the following: .nf .BI "int io_uring_enter2(unsigned int " fd ", unsigned int " to_submit , .BI " unsigned int " min_complete ", unsigned int " flags , .BI " const void *" arg ", size_t " argsz ); .fi which behaves just like the original definition by default. However, if .B IORING_ENTER_EXT_ARG is set, then instead of a .I sigset_t being passed in, a pointer to a .I struct io_uring_getevents_arg is used instead and .I argsz must be set to the size of this structure. The definition is as follows: .nf .BI "struct io_uring_getevents_arg { .BI " __u64 sigmask; .BI " __u32 sigmask_sz; .BI " __u32 pad; .BI " __u64 ts; .BI "}; .fi which allows passing in both a signal mask as well as pointer to a .I struct __kernel_timespec timeout value. If .I ts is set to a valid pointer, then this time value indicates the timeout for waiting on events. If an application is waiting on events and wishes to stop waiting after a specified amount of time, then this can be accomplished directly in version 5.11 and newer by using this feature. .TP .B IORING_ENTER_REGISTERED_RING If the ring file descriptor has been registered through use of .B IORING_REGISTER_RING_FDS, then setting this flag will tell the kernel that the .I ring_fd passed in is the registered ring offset rather than a normal file descriptor. .PP .PP If the io_uring instance was configured for polling, by specifying .B IORING_SETUP_IOPOLL in the call to .BR io_uring_setup (2), then min_complete has a slightly different meaning. Passing a value of 0 instructs the kernel to return any events which are already complete, without blocking. If .I min_complete is a non-zero value, the kernel will still return immediately if any completion events are available. If no event completions are available, then the call will poll either until one or more completions become available, or until the process has exceeded its scheduler time slice. Note that, for interrupt driven I/O (where .B IORING_SETUP_IOPOLL was not specified in the call to .BR io_uring_setup (2)), an application may check the completion queue for event completions without entering the kernel at all. .PP When the system call returns that a certain amount of SQEs have been consumed and submitted, it's safe to reuse SQE entries in the ring. This is true even if the actual IO submission had to be punted to async context, which means that the SQE may in fact not have been submitted yet. If the kernel requires later use of a particular SQE entry, it will have made a private copy of it. .I sig is a pointer to a signal mask (see .BR sigprocmask (2)); if .I sig is not NULL, .BR io_uring_enter (2) first replaces the current signal mask by the one pointed to by .IR sig , then waits for events to become available in the completion queue, and then restores the original signal mask. The following .BR io_uring_enter (2) call: .PP .in +4n .EX ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, &sig); .EE .in .PP is equivalent to .I atomically executing the following calls: .PP .in +4n .EX pthread_sigmask(SIG_SETMASK, &sig, &orig); ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS, NULL); pthread_sigmask(SIG_SETMASK, &orig, NULL); .EE .in .PP See the description of .BR pselect (2) for an explanation of why the .I sig parameter is necessary. Submission queue entries are represented using the following data structure: .PP .in +4n .EX /* * IO submission data structure (Submission Queue Entry) */ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ __u64 addr2; }; union { __u64 addr; /* pointer to buffer or iovecs */ __u64 splice_off_in; } __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; __u32 fsync_flags; __u16 poll_events; /* compatibility */ __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; __u32 open_flags; __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; __u32 rename_flags; __u32 unlink_flags; __u32 hardlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { struct { /* index into fixed buffers, if used */ union { /* index into fixed buffers, if used */ __u16 buf_index; /* for grouped buffer selection */ __u16 buf_group; } /* personality to use, if used */ __u16 personality; union { __s32 splice_fd_in; __u32 file_index; }; }; __u64 __pad2[3]; }; }; .EE .in .PP The .I opcode describes the operation to be performed. It can be one of: .TP .B IORING_OP_NOP Do not perform any I/O. This is useful for testing the performance of the io_uring implementation itself. .TP .B IORING_OP_READV .TP .B IORING_OP_WRITEV Vectored read and write operations, similar to .BR preadv2 (2) and .BR pwritev2 (2). If the file is not seekable, .I off must be set to zero or -1. .TP .B IORING_OP_READ_FIXED .TP .B IORING_OP_WRITE_FIXED Read from or write to pre-mapped buffers. See .BR io_uring_register (2) for details on how to setup a context for fixed reads and writes. .TP .B IORING_OP_FSYNC File sync. See also .BR fsync (2). Note that, while I/O is initiated in the order in which it appears in the submission queue, completions are unordered. For example, an application which places a write I/O followed by an fsync in the submission queue cannot expect the fsync to apply to the write. The two operations execute in parallel, so the fsync may complete before the write is issued to the storage. The same is also true for previously issued writes that have not completed prior to the fsync. .TP .B IORING_OP_POLL_ADD Poll the .I fd specified in the submission queue entry for the events specified in the .I poll_events field. Unlike poll or epoll without .BR EPOLLONESHOT , by default this interface always works in one shot mode. That is, once the poll operation is completed, it will have to be resubmitted. If .B IORING_POLL_ADD_MULTI is set in the SQE .I len field, then the poll will work in multi shot mode instead. That means it'll repatedly trigger when the requested event becomes true, and hence multiple CQEs can be generated from this single SQE. The CQE .I flags field will have .B IORING_CQE_F_MORE set on completion if the application should expect further CQE entries from the original request. If this flag isn't set on completion, then the poll request has been terminated and no further events will be generated. This mode is available since 5.13. This command works like an async .BR poll(2) and the completion event result is the returned mask of events. .TP .B IORING_OP_POLL_REMOVE Remove or update an existing poll request. If found, the .I res field of the .I "struct io_uring_cqe" will contain 0. If not found, .I res will contain .B -ENOENT, or .B -EALREADY if the poll request was in the process of completing already. If .B IORING_POLL_UPDATE_EVENTS is set in the SQE .I len field, then the request will update an existing poll request with the mask of events passed in with this request. The lookup is based on the .I user_data field of the original SQE submitted, and this values is passed in the .I addr field of the SQE. If .B IORING_POLL_UPDATE_USER_DATA is set in the SQE .I len field, then the request will update the .I user_data of an existing poll request based on the value passed in the .I off field. Updating an existing poll is available since 5.13. .TP .B IORING_OP_EPOLL_CTL Add, remove or modify entries in the interest list of .BR epoll (7). See .BR epoll_ctl (2) for details of the system call. .I fd holds the file descriptor that represents the epoll instance, .I addr holds the file descriptor to add, remove or modify, .I len holds the operation (EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD) to perform and, .I off holds a pointer to the .I epoll_events structure. Available since 5.6. .TP .B IORING_OP_SYNC_FILE_RANGE Issue the equivalent of a \fBsync_file_range\fR (2) on the file descriptor. The .I fd field is the file descriptor to sync, the .I off field holds the offset in bytes, the .I len field holds the length in bytes, and the .I sync_range_flags field holds the flags for the command. See also .BR sync_file_range (2) for the general description of the related system call. Available since 5.2. .TP .B IORING_OP_SENDMSG Issue the equivalent of a .BR sendmsg(2) system call. .I fd must be set to the socket file descriptor, .I addr must contain a pointer to the msghdr structure, and .I msg_flags holds the flags associated with the system call. See also .BR sendmsg (2) for the general description of the related system call. Available since 5.3. This command also supports the following modifiers in .I ioprio: .PP .in +12 .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently full and attempting to send data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a send of the data when there is enough space available. This initial send attempt can be wasteful for the case where the socket is expected to be full, setting this flag will bypass the initial send attempt and go straight to arming poll. If poll does indicate that data can be sent, the operation will proceed. .EE .in .PP .TP .B IORING_OP_RECVMSG Works just like IORING_OP_SENDMSG, except for .BR recvmsg(2) instead. See the description of IORING_OP_SENDMSG. Available since 5.3. This command also supports the following modifiers in .I ioprio: .PP .in +12 .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently empty and attempting to receive data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a receive of the data when the socket has data to be read. This initial receive attempt can be wasteful for the case where the socket is expected to be empty, setting this flag will bypass the initial receive attempt and go straight to arming poll. If poll does indicate that data is ready to be received, the operation will proceed. .EE .in .PP .TP .B IORING_OP_SEND Issue the equivalent of a .BR send(2) system call. .I fd must be set to the socket file descriptor, .I addr must contain a pointer to the buffer, .I len denotes the length of the buffer to send, and .I msg_flags holds the flags associated with the system call. See also .BR send(2) for the general description of the related system call. Available since 5.6. This command also supports the following modifiers in .I ioprio: .PP .in +12 .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently full and attempting to send data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a send of the data when there is enough space available. This initial send attempt can be wasteful for the case where the socket is expected to be full, setting this flag will bypass the initial send attempt and go straight to arming poll. If poll does indicate that data can be sent, the operation will proceed. .EE .in .PP .TP .B IORING_OP_RECV Works just like IORING_OP_SEND, except for .BR recv(2) instead. See the description of IORING_OP_SEND. Available since 5.6. This command also supports the following modifiers in .I ioprio: .PP .in +12 .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently empty and attempting to receive data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a receive of the data when the socket has data to be read. This initial receive attempt can be wasteful for the case where the socket is expected to be empty, setting this flag will bypass the initial receive attempt and go straight to arming poll. If poll does indicate that data is ready to be received, the operation will proceed. .EE .in .PP .TP .B IORING_OP_TIMEOUT This command will register a timeout operation. The .I addr field must contain a pointer to a struct timespec64 structure, .I len must contain 1 to signify one timespec64 structure, .I timeout_flags may contain IORING_TIMEOUT_ABS for an absolute timeout value, or 0 for a relative timeout. .I off may contain a completion event count. A timeout will trigger a wakeup event on the completion ring for anyone waiting for events. A timeout condition is met when either the specified timeout expires, or the specified number of events have completed. Either condition will trigger the event. If set to 0, completed events are not counted, which effectively acts like a timer. io_uring timeouts use the .B CLOCK_MONOTONIC clock source. The request will complete with .I -ETIME if the timeout got completed through expiration of the timer, or .I 0 if the timeout got completed through requests completing on their own. If the timeout was canceled before it expired, the request will complete with .I -ECANCELED. Available since 5.4. Since 5.15, this command also supports the following modifiers in .I timeout_flags: .PP .in +12 .B IORING_TIMEOUT_BOOTTIME If set, then the clocksource used is .I CLOCK_BOOTTIME instead of .I CLOCK_MONOTONIC. This clocksource differs in that it includes time elapsed if the system was suspend while having a timeout request in-flight. .B IORING_TIMEOUT_REALTIME If set, then the clocksource used is .I CLOCK_REALTIME instead of .I CLOCK_MONOTONIC. .EE .in .PP .TP .B IORING_OP_TIMEOUT_REMOVE If .I timeout_flags are zero, then it attempts to remove an existing timeout operation. .I addr must contain the .I user_data field of the previously issued timeout operation. If the specified timeout request is found and canceled successfully, this request will terminate with a result value of .I 0 If the timeout request was found but expiration was already in progress, this request will terminate with a result value of .I -EBUSY If the timeout request wasn't found, the request will terminate with a result value of .I -ENOENT Available since 5.5. If .I timeout_flags contain .I IORING_TIMEOUT_UPDATE, instead of removing an existing operation, it updates it. .I addr and return values are same as before. .I addr2 field must contain a pointer to a struct timespec64 structure. .I timeout_flags may also contain IORING_TIMEOUT_ABS, in which case the value given is an absolute one, not a relative one. Available since 5.11. .TP .B IORING_OP_ACCEPT Issue the equivalent of an .BR accept4(2) system call. .I fd must be set to the socket file descriptor, .I addr must contain the pointer to the sockaddr structure, and .I addr2 must contain a pointer to the socklen_t addrlen field. Flags can be passed using the .I accept_flags field. See also .BR accept4(2) for the general description of the related system call. Available since 5.5. If the .I file_index field is set to a positive number, the file won't be installed into the normal file table as usual but will be placed into the fixed file table at index .I file_index - 1. In this case, instead of returning a file descriptor, the result will contain either 0 on success or an error. If the index points to a valid empty slot, the installation is guaranteed to not fail. If there is already a file in the slot, it will be replaced, similar to .B IORING_OP_FILES_UPDATE. Please note that only io_uring has access to such files and no other syscall can use them. See .B IOSQE_FIXED_FILE and .B IORING_REGISTER_FILES. Available since 5.5. .TP .B IORING_OP_ASYNC_CANCEL Attempt to cancel an already issued request. .I addr must contain the .I user_data field of the request that should be canceled. The cancelation request will complete with one of the following results codes. If found, the .I res field of the cqe will contain 0. If not found, .I res will contain -ENOENT. If found and attempted canceled, the .I res field will contain -EALREADY. In this case, the request may or may not terminate. In general, requests that are interruptible (like socket IO) will get canceled, while disk IO requests cannot be canceled if already started. Available since 5.5. .TP .B IORING_OP_LINK_TIMEOUT This request must be linked with another request through .I IOSQE_IO_LINK which is described below. Unlike .I IORING_OP_TIMEOUT, .I IORING_OP_LINK_TIMEOUT acts on the linked request, not the completion queue. The format of the command is otherwise like .I IORING_OP_TIMEOUT, except there's no completion event count as it's tied to a specific request. If used, the timeout specified in the command will cancel the linked command, unless the linked command completes before the timeout. The timeout will complete with .I -ETIME if the timer expired and the linked request was attempted canceled, or .I -ECANCELED if the timer got canceled because of completion of the linked request. Like .B IORING_OP_TIMEOUT the clock source used is .B CLOCK_MONOTONIC Available since 5.5. .TP .B IORING_OP_CONNECT Issue the equivalent of a .BR connect(2) system call. .I fd must be set to the socket file descriptor, .I addr must contain the const pointer to the sockaddr structure, and .I off must contain the socklen_t addrlen field. See also .BR connect(2) for the general description of the related system call. Available since 5.5. .TP .B IORING_OP_FALLOCATE Issue the equivalent of a .BR fallocate(2) system call. .I fd must be set to the file descriptor, .I len must contain the mode associated with the operation, .I off must contain the offset on which to operate, and .I addr must contain the length. See also .BR fallocate(2) for the general description of the related system call. Available since 5.6. .TP .B IORING_OP_FADVISE Issue the equivalent of a .BR posix_fadvise(2) system call. .I fd must be set to the file descriptor, .I off must contain the offset on which to operate, .I len must contain the length, and .I fadvise_advice must contain the advice associated with the operation. See also .BR posix_fadvise(2) for the general description of the related system call. Available since 5.6. .TP .B IORING_OP_MADVISE Issue the equivalent of a .BR madvise(2) system call. .I addr must contain the address to operate on, .I len must contain the length on which to operate, and .I fadvise_advice must contain the advice associated with the operation. See also .BR madvise(2) for the general description of the related system call. Available since 5.6. .TP .B IORING_OP_OPENAT Issue the equivalent of a .BR openat(2) system call. .I fd is the .I dirfd argument, .I addr must contain a pointer to the .I *pathname argument, .I open_flags should contain any flags passed in, and .I len is access mode of the file. See also .BR openat(2) for the general description of the related system call. Available since 5.6. If the .I file_index field is set to a positive number, the file won't be installed into the normal file table as usual but will be placed into the fixed file table at index .I file_index - 1. In this case, instead of returning a file descriptor, the result will contain either 0 on success or an error. If the index points to a valid empty slot, the installation is guaranteed to not fail. If there is already a file in the slot, it will be replaced, similar to .B IORING_OP_FILES_UPDATE. Please note that only io_uring has access to such files and no other syscall can use them. See .B IOSQE_FIXED_FILE and .B IORING_REGISTER_FILES. Available since 5.15. .TP .B IORING_OP_OPENAT2 Issue the equivalent of a .BR openat2(2) system call. .I fd is the .I dirfd argument, .I addr must contain a pointer to the .I *pathname argument, .I len should contain the size of the open_how structure, and .I off should be set to the address of the open_how structure. See also .BR openat2(2) for the general description of the related system call. Available since 5.6. If the .I file_index field is set to a positive number, the file won't be installed into the normal file table as usual but will be placed into the fixed file table at index .I file_index - 1. In this case, instead of returning a file descriptor, the result will contain either 0 on success or an error. If the index points to a valid empty slot, the installation is guaranteed to not fail. If there is already a file in the slot, it will be replaced, similar to .B IORING_OP_FILES_UPDATE. Please note that only io_uring has access to such files and no other syscall can use them. See .B IOSQE_FIXED_FILE and .B IORING_REGISTER_FILES. Available since 5.15. .TP .B IORING_OP_CLOSE Issue the equivalent of a .BR close(2) system call. .I fd is the file descriptor to be closed. See also .BR close(2) for the general description of the related system call. Available since 5.6. If the .I file_index field is set to a positive number, this command can be used to close files that were direct opened through .B IORING_OP_OPENAT , .B IORING_OP_OPENAT2 , or .B IORING_OP_ACCEPT using the io_uring specific direct descriptors. Note that only one of the descriptor fields may be set. The direct close feature is available since the 5.15 kernel, where direct descriptors were introduced. .TP .B IORING_OP_STATX Issue the equivalent of a .BR statx(2) system call. .I fd is the .I dirfd argument, .I addr must contain a pointer to the .I *pathname string, .I statx_flags is the .I flags argument, .I len should be the .I mask argument, and .I off must contain a pointer to the .I statxbuf to be filled in. See also .BR statx(2) for the general description of the related system call. Available since 5.6. .TP .B IORING_OP_READ .TP .B IORING_OP_WRITE Issue the equivalent of a .BR pread(2) or .BR pwrite(2) system call. .I fd is the file descriptor to be operated on, .I addr contains the buffer in question, .I len contains the length of the IO operation, and .I offs contains the read or write offset. If .I fd does not refer to a seekable file, .I off must be set to zero or -1. If .I offs is set to .B -1 , the offset will use (and advance) the file position, like the .BR read(2) and .BR write(2) system calls. These are non-vectored versions of the .B IORING_OP_READV and .B IORING_OP_WRITEV opcodes. See also .BR read(2) and .BR write(2) for the general description of the related system call. Available since 5.6. .TP .B IORING_OP_SPLICE Issue the equivalent of a .BR splice(2) system call. .I splice_fd_in is the file descriptor to read from, .I splice_off_in is an offset to read from, .I fd is the file descriptor to write to, .I off is an offset from which to start writing to. A sentinel value of .B -1 is used to pass the equivalent of a NULL for the offsets to .BR splice(2). .I len contains the number of bytes to copy. .I splice_flags contains a bit mask for the flag field associated with the system call. Please note that one of the file descriptors must refer to a pipe. See also .BR splice(2) for the general description of the related system call. Available since 5.7. .TP .B IORING_OP_TEE Issue the equivalent of a .BR tee(2) system call. .I splice_fd_in is the file descriptor to read from, .I fd is the file descriptor to write to, .I len contains the number of bytes to copy, and .I splice_flags contains a bit mask for the flag field associated with the system call. Please note that both of the file descriptors must refer to a pipe. See also .BR tee(2) for the general description of the related system call. Available since 5.8. .TP .B IORING_OP_FILES_UPDATE This command is an alternative to using .B IORING_REGISTER_FILES_UPDATE which then works in an async fashion, like the rest of the io_uring commands. The arguments passed in are the same. .I addr must contain a pointer to the array of file descriptors, .I len must contain the length of the array, and .I off must contain the offset at which to operate. Note that the array of file descriptors pointed to in .I addr must remain valid until this operation has completed. Available since 5.6. .TP .B IORING_OP_PROVIDE_BUFFERS This command allows an application to register a group of buffers to be used by commands that read/receive data. Using buffers in this manner can eliminate the need to separate the poll + read, which provides a convenient point in time to allocate a buffer for a given request. It's often infeasible to have as many buffers available as pending reads or receive. With this feature, the application can have its pool of buffers ready in the kernel, and when the file or socket is ready to read/receive data, a buffer can be selected for the operation. .I fd must contain the number of buffers to provide, .I addr must contain the starting address to add buffers from, .I len must contain the length of each buffer to add from the range, .I buf_group must contain the group ID of this range of buffers, and .I off must contain the starting buffer ID of this range of buffers. With that set, the kernel adds buffers starting with the memory address in .I addr, each with a length of .I len. Hence the application should provide .I len * fd worth of memory in .I addr. Buffers are grouped by the group ID, and each buffer within this group will be identical in size according to the above arguments. This allows the application to provide different groups of buffers, and this is often used to have differently sized buffers available depending on what the expectations are of the individual request. When submitting a request that should use a provided buffer, the .B IOSQE_BUFFER_SELECT flag must be set, and .I buf_group must be set to the desired buffer group ID where the buffer should be selected from. Available since 5.7. .TP .B IORING_OP_REMOVE_BUFFERS Remove buffers previously registered with .B IORING_OP_PROVIDE_BUFFERS. .I fd must contain the number of buffers to remove, and .I buf_group must contain the buffer group ID from which to remove the buffers. Available since 5.7. .TP .B IORING_OP_SHUTDOWN Issue the equivalent of a .BR shutdown(2) system call. .I fd is the file descriptor to the socket being shutdown, and .I len must be set to the .I how argument. No no other fields should be set. Available since 5.11. .TP .B IORING_OP_RENAMEAT Issue the equivalent of a .BR renameat2(2) system call. .I fd should be set to the .I olddirfd, .I addr should be set to the .I oldpath, .I len should be set to the .I newdirfd, .I addr should be set to the .I oldpath, .I addr2 should be set to the .I newpath, and finally .I rename_flags should be set to the .I flags passed in to .BR renameat2(2). Available since 5.11. .TP .B IORING_OP_UNLINKAT Issue the equivalent of a .BR unlinkat2(2) system call. .I fd should be set to the .I dirfd, .I addr should be set to the .I pathname, and .I unlink_flags should be set to the .I flags being passed in to .BR unlinkat(2). Available since 5.11. .TP .B IORING_OP_MKDIRAT Issue the equivalent of a .BR mkdirat2(2) system call. .I fd should be set to the .I dirfd, .I addr should be set to the .I pathname, and .I len should be set to the .I mode being passed in to .BR mkdirat(2). Available since 5.15. .TP .B IORING_OP_SYMLINKAT Issue the equivalent of a .BR symlinkat2(2) system call. .I fd should be set to the .I newdirfd, .I addr should be set to the .I target and .I addr2 should be set to the .I linkpath being passed in to .BR symlinkat(2). Available since 5.15. .TP .B IORING_OP_LINKAT Issue the equivalent of a .BR linkat2(2) system call. .I fd should be set to the .I olddirfd, .I addr should be set to the .I oldpath, .I len should be set to the .I newdirfd, .I addr2 should be set to the .I newpath, and .I hardlink_flags should be set to the .I flags being passed in to .BR linkat(2). Available since 5.15. .TP .B IORING_OP_MSG_RING Send a message to an io_uring. .I fd must be set to a file descriptor of a ring that the application has access to, .I len can be set to any 32-bit value that the application wishes to pass on, and .I off should be set any 64-bit value that the application wishes to send. On the target ring, a CQE will be posted with the .I res field matching the .I len set, and a .I user_data field matching the .I off value being passed in. This request type can be used to either just wake or interrupt anyone waiting for completions on the target ring, or it can be used to pass messages via the two fields. Available since 5.18. .TP .B IORING_OP_SOCKET Issue the equivalent of a .BR socket(2) system call. .I fd must contain the communication domain, .I off must contain the communication type, .I len must contain the protocol, and .I rw_flags is currently unused and must be set to zero. See also .BR socket(2) for the general description of the related system call. Available since 5.19. If the .I file_index field is set to a positive number, the file won't be installed into the normal file table as usual but will be placed into the fixed file table at index .I file_index - 1. In this case, instead of returning a file descriptor, the result will contain either 0 on success or an error. If the index points to a valid empty slot, the installation is guaranteed to not fail. If there is already a file in the slot, it will be replaced, similar to .B IORING_OP_FILES_UPDATE. Please note that only io_uring has access to such files and no other syscall can use them. See .B IOSQE_FIXED_FILE and .B IORING_REGISTER_FILES. Available since 5.19. .TP .B IORING_OP_SEND_ZC Issue the zerocopy equivalent of a .BR send(2) system call. Similar to IORING_OP_SEND, but tries to avoid making intermediate copies of data. Zerocopy execution is not guaranteed and may fall back to copying. The request may also fail with .B -EOPNOTSUPP , when a protocol doesn't support zerocopy, in which case users are recommended to use copying sends instead. The .I flags field of the first .I "struct io_uring_cqe" may likely contain .B IORING_CQE_F_MORE , which means that there will be a second completion event / notification for the request, with the .I user_data field set to the same value. The user must not modify the data buffer until the notification is posted. The first cqe follows the usual rules and so its .I res field will contain the number of bytes sent or a negative error code. The notification's .I res field will be set to zero and the .I flags field will contain .B IORING_CQE_F_NOTIF . The two step model is needed because the kernel may hold on to buffers for a long time, e.g. waiting for a TCP ACK, and having a separate cqe for request completions allows userspace to push more data without extra delays. Note, notifications are only responsible for controlling the lifetime of the buffers, and as such don't mean anything about whether the data has atually been sent out or received by the other end. Even errored requests may generate a notification, and the user must check for .B IORING_CQE_F_MORE rather than relying on the result. .I fd must be set to the socket file descriptor, .I addr must contain a pointer to the buffer, .I len denotes the length of the buffer to send, and .I msg_flags holds the flags associated with the system call. When .I addr2 is non-zero it points to the address of the target with .I addr_len specifying its size, turning the request into a .BR sendto(2) system call equivalent. Available since 6.0. This command also supports the following modifiers in .I ioprio: .PP .in +12 .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently full and attempting to send data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a send of the data when there is enough space available. This initial send attempt can be wasteful for the case where the socket is expected to be full, setting this flag will bypass the initial send attempt and go straight to arming poll. If poll does indicate that data can be sent, the operation will proceed. .B IORING_RECVSEND_FIXED_BUF If set, instructs io_uring to use a pre-mapped buffer. The .I buf_index field should contain an index into an array of fixed buffers. See .BR io_uring_register (2) for details on how to setup a context for fixed buffer I/O. .EE .in .PP .TP .B IORING_OP_WAITID Issue the equivalent of a .BR waitid(2) system call. .I len must contain the idtype being queried/waited for and .I fd must contain the 'pid' (or id) being waited for. .I file_index is the 'options' being set (the child state changes to wait for). .I addr2 is a pointer to siginfo_t, if any, being filled in. See also .BR waitid(2) for the general description of the related system call. Available since 6.5. .PP The .I flags field is a bit mask. The supported flags are: .TP .B IOSQE_FIXED_FILE When this flag is specified, .I fd is an index into the files array registered with the io_uring instance (see the .B IORING_REGISTER_FILES section of the .BR io_uring_register (2) man page). Note that this isn't always available for all commands. If used on a command that doesn't support fixed files, the SQE will error with .B -EBADF. Available since 5.1. .TP .B IOSQE_IO_DRAIN When this flag is specified, the SQE will not be started before previously submitted SQEs have completed, and new SQEs will not be started before this one completes. Available since 5.2. .TP .B IOSQE_IO_LINK When this flag is specified, the SQE forms a link with the next SQE in the submission ring. That next SQE will not be started before the previous request completes. This, in effect, forms a chain of SQEs, which can be arbitrarily long. The tail of the chain is denoted by the first SQE that does not have this flag set. Chains are not supported across submission boundaries. Even if the last SQE in a submission has this flag set, it will still terminate the current chain. This flag has no effect on previous SQE submissions, nor does it impact SQEs that are outside of the chain tail. This means that multiple chains can be executing in parallel, or chains and individual SQEs. Only members inside the chain are serialized. A chain of SQEs will be broken, if any request in that chain ends in error. io_uring considers any unexpected result an error. This means that, eg, a short read will also terminate the remainder of the chain. If a chain of SQE links is broken, the remaining unstarted part of the chain will be terminated and completed with .B -ECANCELED as the error code. Available since 5.3. .TP .B IOSQE_IO_HARDLINK Like IOSQE_IO_LINK, but it doesn't sever regardless of the completion result. Note that the link will still sever if we fail submitting the parent request, hard links are only resilient in the presence of completion results for requests that did submit correctly. IOSQE_IO_HARDLINK implies IOSQE_IO_LINK. Available since 5.5. .TP .B IOSQE_ASYNC Normal operation for io_uring is to try and issue an sqe as non-blocking first, and if that fails, execute it in an async manner. To support more efficient overlapped operation of requests that the application knows/assumes will always (or most of the time) block, the application can ask for an sqe to be issued async from the start. Available since 5.6. .TP .B IOSQE_BUFFER_SELECT Used in conjunction with the .B IORING_OP_PROVIDE_BUFFERS command, which registers a pool of buffers to be used by commands that read or receive data. When buffers are registered for this use case, and this flag is set in the command, io_uring will grab a buffer from this pool when the request is ready to receive or read data. If successful, the resulting CQE will have .B IORING_CQE_F_BUFFER set in the flags part of the struct, and the upper .B IORING_CQE_BUFFER_SHIFT bits will contain the ID of the selected buffers. This allows the application to know exactly which buffer was selected for the operation. If no buffers are available and this flag is set, then the request will fail with .B -ENOBUFS as the error code. Once a buffer has been used, it is no longer available in the kernel pool. The application must re-register the given buffer again when it is ready to recycle it (eg has completed using it). Available since 5.7. .TP .B IOSQE_CQE_SKIP_SUCCESS Don't generate a CQE if the request completes successfully. If the request fails, an appropriate CQE will be posted as usual and if there is no .B IOSQE_IO_HARDLINK, CQEs for all linked requests will be omitted. The notion of failure/success is opcode specific and is the same as with breaking chains of .B IOSQE_IO_LINK. One special case is when the request has a linked timeout, then the CQE generation for the linked timeout is decided solely by whether it has .B IOSQE_CQE_SKIP_SUCCESS set, regardless whether it timed out or was canceled. In other words, if a linked timeout has the flag set, it's guaranteed to not post a CQE. The semantics are chosen to accommodate several use cases. First, when all but the last request of a normal link without linked timeouts are marked with the flag, only one CQE per link is posted. Additionally, it enables suppression of CQEs in cases where the side effects of a successfully executed operation is enough for userspace to know the state of the system. One such example would be writing to a synchronisation file. This flag is incompatible with .B IOSQE_IO_DRAIN. Using both of them in a single ring is undefined behavior, even when they are not used together in a single request. Currently, after the first request with .B IOSQE_CQE_SKIP_SUCCESS, all subsequent requests marked with drain will be failed at submission time. Note that the error reporting is best effort only, and restrictions may change in the future. Available since 5.17. .PP .I ioprio specifies the I/O priority. See .BR ioprio_get (2) for a description of Linux I/O priorities. .I fd specifies the file descriptor against which the operation will be performed, with the exception noted above. If the operation is one of .B IORING_OP_READ_FIXED or .BR IORING_OP_WRITE_FIXED , .I addr and .I len must fall within the buffer located at .I buf_index in the fixed buffer array. If the operation is either .B IORING_OP_READV or .BR IORING_OP_WRITEV , then .I addr points to an iovec array of .I len entries. .IR rw_flags , specified for read and write operations, contains a bitwise OR of per-I/O flags, as described in the .BR preadv2 (2) man page. The .I fsync_flags bit mask may contain either 0, for a normal file integrity sync, or .B IORING_FSYNC_DATASYNC to provide data sync only semantics. See the descriptions of .B O_SYNC and .B O_DSYNC in the .BR open (2) manual page for more information. The bits that may be set in .I poll_events are defined in \fI\fP, and documented in .BR poll (2). .I user_data is an application-supplied value that will be copied into the completion queue entry (see below). .I buf_index is an index into an array of fixed buffers, and is only valid if fixed buffers were registered. .I personality is the credentials id to use for this operation. See .BR io_uring_register(2) for how to register personalities with io_uring. If set to 0, the current personality of the submitting task is used. .PP Once the submission queue entry is initialized, I/O is submitted by placing the index of the submission queue entry into the tail of the submission queue. After one or more indexes are added to the queue, and the queue tail is advanced, the .BR io_uring_enter (2) system call can be invoked to initiate the I/O. Completions use the following data structure: .PP .in +4n .EX /* * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ __u32 flags; }; .EE .in .PP .I user_data is copied from the field of the same name in the submission queue entry. The primary use case is to store data that the application will need to access upon completion of this particular I/O. The .I flags is used for certain commands, like .B IORING_OP_POLL_ADD or in conjunction with .B IOSQE_BUFFER_SELECT or .B IORING_OP_MSG_RING, , see those entries for details. .I res is the operation-specific result, but io_uring-specific errors (e.g. flags or opcode invalid) are returned through this field. They are described in section .B CQE ERRORS. .PP For read and write opcodes, the return values match .I errno values documented in the .BR preadv2 (2) and .BR pwritev2 (2) man pages, with .I res holding the equivalent of .I -errno for error cases, or the transferred number of bytes in case the operation is successful. Hence both error and success return can be found in that field in the CQE. For other request types, the return values are documented in the matching man page for that type, or in the opcodes section above for io_uring-specific opcodes. .PP .SH RETURN VALUE .BR io_uring_enter (2) returns the number of I/Os successfully consumed. This can be zero if .I to_submit was zero or if the submission queue was empty. Note that if the ring was created with .B IORING_SETUP_SQPOLL specified, then the return value will generally be the same as .I to_submit as submission happens outside the context of the system call. The errors related to a submission queue entry will be returned through a completion queue entry (see section .B CQE ERRORS), rather than through the system call itself. Errors that occur not on behalf of a submission queue entry are returned via the system call directly. On such an error, a negative error code is returned. The caller should not rely on .I errno variable. .PP .SH ERRORS These are the errors returned by .BR io_uring_enter (2) system call. .TP .B EAGAIN The kernel was unable to allocate memory for the request, or otherwise ran out of resources to handle it. The application should wait for some completions and try again. .TP .B EBADF .I fd is not a valid file descriptor. .TP .B EBADFD .I fd is a valid file descriptor, but the io_uring ring is not in the right state (enabled). See .BR io_uring_register (2) for details on how to enable the ring. .TP .B EBADR At least one CQE was dropped even with the .B IORING_FEAT_NODROP feature, and there are no otherwise available CQEs. This clears the error state and so with no other changes the next call to .BR io_uring_enter (2) will not have this error. This error should be extremely rare and indicates the machine is running critically low on memory. It may be reasonable for the application to terminate running unless it is able to safely handle any CQE being lost. .TP .B EBUSY If the .B IORING_FEAT_NODROP feature flag is set, then .B EBUSY will be returned if there were overflow entries, .B IORING_ENTER_GETEVENTS flag is set and not all of the overflow entries were able to be flushed to the CQ ring. Without .B IORING_FEAT_NODROP the application is attempting to overcommit the number of requests it can have pending. The application should wait for some completions and try again. May occur if the application tries to queue more requests than we have room for in the CQ ring, or if the application attempts to wait for more events without having reaped the ones already present in the CQ ring. .TP .B EEXIST The thread submitting the work is invalid. .TP .B EINVAL Some bits in the .I flags argument are invalid. .TP .B EFAULT An invalid user space address was specified for the .I sig argument. .TP .B ENXIO The io_uring instance is in the process of being torn down. .TP .B EOPNOTSUPP .I fd does not refer to an io_uring instance. .TP .B EINTR The operation was interrupted by a delivery of a signal before it could complete; see .BR signal(7). Can happen while waiting for events with .B IORING_ENTER_GETEVENTS. .SH CQE ERRORS These io_uring-specific errors are returned as a negative value in the .I res field of the completion queue entry. .TP .B EACCES The .I flags field or .I opcode in a submission queue entry is not allowed due to registered restrictions. See .BR io_uring_register (2) for details on how restrictions work. .TP .B EBADF The .I fd field in the submission queue entry is invalid, or the .B IOSQE_FIXED_FILE flag was set in the submission queue entry, but no files were registered with the io_uring instance. .TP .B EFAULT buffer is outside of the process' accessible address space .TP .B EFAULT .B IORING_OP_READ_FIXED or .B IORING_OP_WRITE_FIXED was specified in the .I opcode field of the submission queue entry, but either buffers were not registered for this io_uring instance, or the address range described by .I addr and .I len does not fit within the buffer registered at .IR buf_index . .TP .B EINVAL The .I flags field or .I opcode in a submission queue entry is invalid. .TP .B EINVAL The .I buf_index member of the submission queue entry is invalid. .TP .B EINVAL The .I personality field in a submission queue entry is invalid. .TP .B EINVAL .B IORING_OP_NOP was specified in the submission queue entry, but the io_uring context was setup for polling .RB ( IORING_SETUP_IOPOLL was specified in the call to io_uring_setup). .TP .B EINVAL .B IORING_OP_READV or .B IORING_OP_WRITEV was specified in the submission queue entry, but the io_uring instance has fixed buffers registered. .TP .B EINVAL .B IORING_OP_READ_FIXED or .B IORING_OP_WRITE_FIXED was specified in the submission queue entry, and the .I buf_index is invalid. .TP .B EINVAL .BR IORING_OP_READV , .BR IORING_OP_WRITEV , .BR IORING_OP_READ_FIXED , .B IORING_OP_WRITE_FIXED or .B IORING_OP_FSYNC was specified in the submission queue entry, but the io_uring instance was configured for IOPOLLing, or any of .IR addr , .IR ioprio , .IR off , .IR len , or .I buf_index was set in the submission queue entry. .TP .B EINVAL .B IORING_OP_POLL_ADD or .B IORING_OP_POLL_REMOVE was specified in the .I opcode field of the submission queue entry, but the io_uring instance was configured for busy-wait polling .RB ( IORING_SETUP_IOPOLL ), or any of .IR ioprio , .IR off , .IR len , or .I buf_index was non-zero in the submission queue entry. .TP .B EINVAL .B IORING_OP_POLL_ADD was specified in the .I opcode field of the submission queue entry, and the .I addr field was non-zero. .TP .B EOPNOTSUPP .I opcode is valid, but not supported by this kernel. .TP .B EOPNOTSUPP .B IOSQE_BUFFER_SELECT was set in the .I flags field of the submission queue entry, but the .I opcode doesn't support buffer selection. liburing-2.6/man/io_uring_enter2.2000077700000000000000000000000001461424365000222172io_uring_enter.2ustar00rootroot00000000000000liburing-2.6/man/io_uring_for_each_cqe.3000066400000000000000000000030411461424365000203270ustar00rootroot00000000000000.\" Copyright (C) 2023 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_for_each_cqe 3 "June 04, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_for_each_cqe \- iterate pending completion events .SH SYNOPSIS .nf .B #include .PP .BI "io_uring_for_each_cqe(struct io_uring *" ring "," .BI " unsigned " head "," .BI " struct io_uring_cqe *" cqe ") { } .fi .SH DESCRIPTION .PP The .BR io_uring_for_each_cqe (3) is a macro helper that iterates completion events belonging to the .I ring using .I head as a temporary iterator, and points .I cqe to each pending event when iterating. This helper provides an efficient way to iterate all pending events in the ring, and then advancing the CQ ring by calling .BR io_uring_cq_advance (3) with the number of CQEs consumed when done. As updating the kernel visible CQ ring state involves an ordered write, doing it once for a number of events is more efficient than handling each completion separately and calling .BR io_uring_cqe_seen (3) for each of them. .SH EXAMPLE .EX void handle_cqes(struct io_uring *ring) { struct io_uring_cqe *cqe; unsigned head; unsigned i = 0; io_uring_for_each_cqe(ring, head, cqe) { /* handle completion */ printf("cqe: %d\\n", cqe->res); i++; } io_uring_cq_advance(ring, i); } .EE .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_wait_cqe_timeout (3), .BR io_uring_wait_cqe (3), .BR io_uring_wait_cqes (3), .BR io_uring_cqe_seen (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_free_buf_ring.3000066400000000000000000000026771461424365000205430ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_free_buf_ring 3 "Mar 07, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_free_buf_ring \- register and free a buffer ring for provided buffers .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_free_buf_ring(struct io_uring *" ring ", .BI " struct io_uring_buf_ring *" br ", .BI " unsigned int " nentries ", .BI " int " bgid ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_free_buf_ring (3) function unregisters a previously registered shared buffer ring. The ring must have heen previously returned from .BR io_uring_setup_buf_ring (3) . The .I ring argument must pointer to the ring for which the provided buffer ring is being registered, .I br must point to a buffer ring previously returned by .BR io_uring_setup_buf_ring (3) , .I nentries is the number of entries requested in the buffer ring, and .I bgid is the buffer group ID that .I br was setup with. Under the covers, this function uses .BR io_uring_unregister_buf_ring (3) to unregister the ring, and handles the freeing of the ring rather than letting the application open code it. Available since 5.19. .SH RETURN VALUE On success .BR io_uring_free_buf_ring (3) returns a pointer to the buffe ring. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_setup_buf_ring (3) liburing-2.6/man/io_uring_free_probe.3000066400000000000000000000010571461424365000200460ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_free_probe 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_free_probe \- free probe instance .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_free_probe(struct io_uring_probe *" probe ");" .fi .SH DESCRIPTION .PP The function .BR io_uring_free_probe (3) frees the .I probe instance allocated with the .BR io_uring_get_probe (3) function. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_get_probe (3) liburing-2.6/man/io_uring_get_events.3000066400000000000000000000016471461424365000201060ustar00rootroot00000000000000.\" Copyright (C) 2022 Dylan Yudaken .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_get_events 3 "September 5, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_get_events \- Flush outstanding requests to CQE ring .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_get_events(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_get_events (3) function runs outstanding work and flushes completion events to the CQE ring. There can be events needing to be flushed if the ring was full and had overflowed. Alternatively if the ring was setup with the .BR IORING_SETUP_DEFER_TASKRUN flag then this will process outstanding tasks, possibly resulting in more CQEs. .SH RETURN VALUE On success .BR io_uring_get_events (3) returns 0. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit_and_get_events (3), .BR io_uring_cq_has_overflow (3) liburing-2.6/man/io_uring_get_probe.3000066400000000000000000000015041461424365000177010ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_get_probe 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_get_probe \- get probe instance .SH SYNOPSIS .nf .B #include .PP .BI "io_uring_probe *io_uring_get_probe(void);" .fi .SH DESCRIPTION .PP The function .BR io_uring_get_probe (3) returns an allocated io_uring_probe structure to the caller. The caller is responsible for freeing the structure with the function .BR io_uring_free_probe (3). .SH NOTES Earlier versions of the Linux kernel do not support probe. If the kernel doesn't support probe, this function will return NULL. .SH RETURN VALUE On success it returns an allocated io_uring_probe structure, otherwise it returns NULL. .SH SEE ALSO .BR io_uring_free_probe (3) liburing-2.6/man/io_uring_get_sqe.3000066400000000000000000000030741461424365000173660ustar00rootroot00000000000000.\" Copyright (C) 2020 Jens Axboe .\" Copyright (C) 2020 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_get_sqe 3 "July 10, 2020" "liburing-0.7" "liburing Manual" .SH NAME io_uring_get_sqe \- get the next available submission queue entry from the submission queue .SH SYNOPSIS .nf .B #include .PP .BI "struct io_uring_sqe *io_uring_get_sqe(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_get_sqe (3) function gets the next available submission queue entry from the submission queue belonging to the .I ring param. On success .BR io_uring_get_sqe (3) returns a pointer to the submission queue entry. On failure NULL is returned. If a submission queue entry is returned, it should be filled out via one of the prep functions such as .BR io_uring_prep_read (3) and submitted via .BR io_uring_submit (3). Note that neither .BR io_uring_get_sqe nor the prep functions set (or clear) the .B user_data field of the SQE. If the caller expects .BR io_uring_cqe_get_data (3) or .BR io_uring_cqe_get_data64 (3) to return valid data when reaping IO completions, either .BR io_uring_sqe_set_data (3) or .BR io_uring_sqe_set_data64 (3) .B MUST have been called before submitting the request. .SH RETURN VALUE .BR io_uring_get_sqe (3) returns a pointer to the next submission queue event on success and NULL on failure. If NULL is returned, the SQ ring is currently full and entries must be submitted for processing before new ones can get allocated. .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_sqe_set_data (3) liburing-2.6/man/io_uring_major_version.3000077700000000000000000000000001461424365000254042io_uring_check_version.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_minor_version.3000077700000000000000000000000001461424365000254202io_uring_check_version.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_opcode_supported.3000066400000000000000000000014531461424365000213140ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_opcode_supported 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_opcode_supported \- is op code supported? .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_opcode_supported(struct io_uring_probe *" probe "," .BI " int " opcode ");" .fi .SH DESCRIPTION .PP The function .BR io_uring_opcode_supported (3) allows the caller to determine if the passed in .I opcode belonging to the .I probe param is supported. An instance of the io_uring_probe instance can be obtained by calling the function .BR io_uring_get_probe (3). .SH RETURN VALUE On success it returns 1, otherwise it returns 0. .SH SEE ALSO .BR io_uring_get_probe (3) liburing-2.6/man/io_uring_peek_cqe.3000066400000000000000000000020071461424365000175060ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_peek_cqe 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_peek_cqe \- check if an io_uring completion event is available .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_peek_cqe(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_peek_cqe (3) function returns an IO completion from the queue belonging to the .I ring param, if one is readily available. On successful return, .I cqe_ptr param is filled with a valid CQE entry. This function does not enter the kernel to wait for an event, an event is only returned if it's already available in the CQ ring. .SH RETURN VALUE On success .BR io_uring_peek_cqe (3) returns .B 0 and the cqe_ptr parameter is filled in. On failure it returns .BR -EAGAIN . .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqes (3), .BR io_uring_wait_cqe (3) liburing-2.6/man/io_uring_prep_accept.3000066400000000000000000000151471461424365000202300ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_accept 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_accept \- prepare an accept request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_accept(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " struct sockaddr *" addr "," .BI " socklen_t *" addrlen "," .BI " int " flags ");" .PP .BI "void io_uring_prep_accept_direct(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " struct sockaddr *" addr "," .BI " socklen_t *" addrlen "," .BI " int " flags "," .BI " unsigned int " file_index ");" .PP .BI "void io_uring_prep_multishot_accept(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " struct sockaddr *" addr "," .BI " socklen_t *" addrlen "," .BI " int " flags ");" .PP .BI "void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " struct sockaddr *" addr "," .BI " socklen_t *" addrlen "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_accept (3) function and its three variants prepare an accept request similar to .BR accept4 (2). The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start accepting a connection request described by the socket address at .I addr and of structure length .I addrlen and using modifier flags in .IR flags . The three variants allow combining the direct file table and multishot features. Direct descriptors are io_uring private file descriptors. They avoid some of the overhead associated with thread shared file tables and can be used in any io_uring request that takes a file descriptor. The two direct variants here create such direct descriptors. Subsequent to their creation, they can be used by setting .B IOSQE_FIXED_FILE in the SQE .I flags member, and setting the SQE .I fd field to the direct descriptor value rather than the regular file descriptor. Direct descriptors are managed like registered files. To use an accept direct variant, the application must first have registered a file table of a desired size using .BR io_uring_register_files (3) or .BR io_uring_register_files_sparse (3). Once registered, .BR io_uring_prep_accept_direct (3) allows an entry in that table to be specifically selected through the .I file_index argument. If the specified entry already contains a file, the file will first be removed from the table and closed, consistent with the behavior of updating an existing file with .BR io_uring_register_files_update (3). .I file_index can also be set to .B IORING_FILE_INDEX_ALLOC for this variant and an unused table index will be dynamically chosen and returned. Likewise, .B io_uring_prep_multishot_accept_direct will have an unused table index dynamically chosen and returned for each connection accepted. If both forms of direct selection will be employed, specific and dynamic, see .BR io_uring_register_file_alloc_range (3) for setting up the table so dynamically chosen entries are made against a different range than that targeted by specific requests. Note that old kernels don't check the SQE .I file_index field meaning applications cannot rely on a .B -EINVAL CQE .I res being returned when the kernel is too old because older kernels may not recognize they are being asked to use a direct table slot. When a direct descriptor accept request asks for a table slot to be dynamically chosen but there are no free entries, .B -ENFILE is returned as the CQE .IR res . The multishot variants allow an application to issue a single accept request, which will repeatedly trigger a CQE when a connection request comes in. Like other multishot type requests, the application should look at the CQE .I flags and see if .B IORING_CQE_F_MORE is set on completion as an indication of whether or not the accept request will generate further CQEs. Note that for the multishot variants, setting .B addr and .B addrlen may not make a lot of sense, as the same value would be used for every accepted connection. This means that the data written to .B addr may be overwritten by a new connection before the application has had time to process a past connection. If the application knows that a new connection cannot come in before a previous one has been processed, it may be used as expected. The multishot variants are available since 5.19. See the man page .BR accept4 (2) for details of the accept function itself. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. .BR io_uring_prep_accept (3) generates the installed file descriptor as its result. .BR io_uring_prep_accept_direct (3) and .I file_index set to a specific direct descriptor generates .B 0 on success. The caller must remember which direct descriptor was picked for this request. .BR io_uring_prep_accept_direct (3) and .I file_index set to .B IORING_FILE_INDEX_ALLOC generates the dynamically chosen direct descriptor. .BR io_uring_prep_multishot_accept (3) generates the installed file descriptor in each result. .BR io_uring_prep_multishot_accept_direct (3), generates the dynamically chosen direct descriptor in each result. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it generates the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register_files (3), .BR io_uring_register_files_sparse (3), .BR io_uring_register_file_alloc_range (3), .BR io_uring_register (2), .BR accept4 (2) liburing-2.6/man/io_uring_prep_accept_direct.3000077700000000000000000000000001461424365000260112io_uring_prep_accept.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_cancel.3000066400000000000000000000077251461424365000202210ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_cancel 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_cancel \- prepare a cancelation request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_cancel64(struct io_uring_sqe *" sqe "," .BI " __u64 " user_data "," .BI " int " flags ");" .PP .BI "void io_uring_prep_cancel(struct io_uring_sqe *" sqe "," .BI " void *" user_data "," .BI " int " flags ");" .PP .BI "void io_uring_prep_cancel_fd(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_cancel (3) function prepares a cancelation request. The submission queue entry .I sqe is prepared to cancel an existing request identified by .IR user_data . For the .I flags argument, see below. .BR io_uring_prep_cancel64 (3) is identical to .BR io_uring_prep_cancel (3) , except it takes a 64-bit integer rather than a pointer type. The cancelation request will attempt to find the previously issued request identified by .I user_data and cancel it. The identifier is what the previously issued request has in their .I user_data field in the SQE. The .BR io_uring_prep_cancel_fd (3) function prepares a cancelation request. The submission queue entry .I sqe is prepared to cancel an existing request that used the file descriptor .IR fd . For the .I flags argument, see below. The cancelation request will attempt to find the previously issued request that used .I fd as the file descriptor and cancel it. By default, the first request matching the criteria given will be canceled. This can be modified with any of the following flags passed in: .TP .B IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given criteria, rather than just canceling the first one found. Available since 5.19. .TP .B IORING_ASYNC_CANCEL_FD Match based on the file descriptor used in the original request rather than the user_data. This is what .BR io_uring_prep_cancel_fd (3) sets up. Available since 5.19. .TP .B IORING_ASYNC_CANCEL_FD_FIXED Set in conjunction with .B IORING_ASYNC_CANCEL_FD , indicating that the file descriptor given is a direct descriptor rather than a normal file descriptor. Available since 6.0. .TP .B IORING_ASYNC_CANCEL_ANY Match any request in the ring, regardless of user_data or file descriptor. Can be used to cancel any pending request in the ring. Available since 5.19. .P .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. If no flags are used to cancel multiple requests, .B 0 is returned on success. If flags are used to match multiple requests, then a positive value is returned indicating how many requests were found and canceled. .TP .B -ENOENT The request identified by .I user_data could not be located. This could be because it completed before the cancelation request was issued, or if an invalid identifier is used. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EALREADY The execution state of the request has progressed far enough that cancelation is no longer possible. This should normally mean that it will complete shortly, either successfully, or interrupted due to the cancelation. .SH NOTES Although the cancelation request uses async request syntax, the kernel side of the cancelation is always run synchronously. It is guaranteed that a CQE is always generated by the time the cancel request has been submitted. If the cancelation is successful, the completion for the request targeted for cancelation will have been posted by the time submission returns. For .B -EALREADY it may take a bit of time to do so. For this case, the caller must wait for the canceled request to post its completion event. .SH SEE ALSO .BR io_uring_prep_poll_remove (3), .BR io_uring_get_sqe (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_cancel64.3000077700000000000000000000000001461424365000246052io_uring_prep_cancel.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_cancel_fd.3000077700000000000000000000000001461424365000251042io_uring_prep_cancel.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_close.3000066400000000000000000000027121461424365000200700ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_close 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_close \- prepare a file descriptor close request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_close(struct io_uring_sqe *" sqe "," .BI " int " fd ");" .PP .BI "void io_uring_prep_close_direct(struct io_uring_sqe *" sqe "," .BI " unsigned " file_index ");" .PP .fi .SH DESCRIPTION .PP The .BR io_uring_prep_close (3) function prepares a close request. The submission queue entry .I sqe is setup to close the file descriptor indicated by .IR fd . For a direct descriptor close request, the offset is specified by the .I file_index argument instead of the .IR fd . This is identical to unregistering the direct descriptor, and is provided as a convenience. These functions prepare an async .BR close (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR close (2) liburing-2.6/man/io_uring_prep_close_direct.3000077700000000000000000000000001461424365000255252io_uring_prep_close.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_cmd.3000066400000000000000000000054731461424365000175350ustar00rootroot00000000000000.\" Copyright (C) 2023 Breno Leitao .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_cmd 3 "July 27, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_cmd_sock \- prepare a command request for a socket .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_cmd_sock(struct io_uring_sqe *" sqe "," .BI " int " cmd_op "," .BI " int " fd "," .BI " int " level "," .BI " int " optname "," .BI " void " *optval "," .BI " int " optlen ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_cmd_sock (3) function prepares an cmd request for a socket. The submission queue entry .I sqe is setup to use the socket file descriptor pointed to by .I fd to start an command operation defined by .I cmd_op. This is a generic function, and each command has their own individual .I level, optname, optval values. The optlen defines the size pointed by .I optval. .SH Available commands .TP .B SOCKET_URING_OP_SIOCINQ Returns the amount of queued unread data in the receive buffer. The socket must not be in LISTEN state, otherwise an error .B -EINVAL is returned in the CQE .I res field. The following arguments are not used for this command .I level, optname, optval and .I optlen. Negative return value means an error. For more information about this command, please check .BR unix(7). .TP .B SOCKET_URING_OP_SIOCOUTQ Returns the amount of unsent data in the socket send queue. The socket must not be in LISTEN state, otherwise an error .B -EINVAL is returned in the CQE .I res. field. The following arguments are not used for this command .I level, optname, optval and .I optlen. Negative return value means an error. For more information about this command, please check .BR unix(7). .TP .B SOCKET_URING_OP_GETSOCKOPT Command to get options for the socket referred to by the socket file descriptor .I fd. The arguments are similar to the .BR getsockopt(2) system call. The .BR SOCKET_URING_OP_GETSOCKOPT command is limited to .BR SOL_SOCKET .I level. Differently from the .BR getsockopt(2) system call, the updated .I optlen value is returned in the CQE .I res field, on success. On failure, the CQE .I res contains a negative error number. .TP .B SOCKET_URING_OP_SETSOCKOPT Command to set options for the socket referred to by the socket file descriptor .I fd. The arguments are similar to the .BR setsockopt(2) system call. .SH NOTES The memory block pointed by .I optval needs to be valid/live until the CQE returns. .SH RETURN VALUE Dependent on the command. .SH ERRORS The CQE .I res field will contain the result of the operation. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR unix (7) liburing-2.6/man/io_uring_prep_connect.3000066400000000000000000000035651461424365000204230ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_connect 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_connect \- prepare a connect request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_connect(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " const struct sockaddr *" addr "," .BI " socklen_t " addrlen ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_connect (3) function prepares a connect request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start connecting to the destination described by the socket address at .I addr and of structure length .IR addrlen . This function prepares an async .BR connect (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR connect (2) liburing-2.6/man/io_uring_prep_fadvise.3000066400000000000000000000026611461424365000204070ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_fadvise 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_fadvise \- prepare a fadvise request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_fadvise(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " __u64 " offset "," .BI " off_t " len "," .BI " int " advice ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_fadvise (3) function prepares an fadvise request. The submission queue entry .I sqe is setup to use the file descriptor pointed to by .I fd to start an fadvise operation at .I offset and of .I len length in bytes, giving it the advise located in .IR advice . This function prepares an async .BR posix_fadvise (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR posix_fadvise (2) liburing-2.6/man/io_uring_prep_fallocate.3000066400000000000000000000026141461424365000207160ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_fallocate 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_fallocate \- prepare a fallocate request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_fallocate(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " int " mode "," .BI " __u64 " offset "," .BI " __u64 " len ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_fallocate (3) function prepares a fallocate request. The submission queue entry .I sqe is setup to use the file descriptor pointed to by .I fd to start a fallocate operation described by .I mode at offset .I offset and .I len length in bytes. This function prepares an async .BR fallocate (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR fallocate (2) liburing-2.6/man/io_uring_prep_fgetxattr.3000077700000000000000000000000001461424365000256332io_uring_prep_getxattr.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_files_update.3000066400000000000000000000052631461424365000214330ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_files_update 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_files_update \- prepare a registered file update request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_files_update(struct io_uring_sqe *" sqe "," .BI " int *" fds "," .BI " unsigned " nr_fds "," .BI " int " offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_files_update (3) function prepares a request for updating a number of previously registered file descriptors. The submission queue entry .I sqe is setup to use the file descriptor array pointed to by .I fds and of .I nr_fds in length to update that amount of previously registered files starting at offset .IR offset . Once a previously registered file is updated with a new one, the existing entry is updated and then removed from the table. This operation is equivalent to first unregistering that entry and then inserting a new one, just bundled into one combined operation. If .I offset is specified as IORING_FILE_INDEX_ALLOC, io_uring will allocate free direct descriptors instead of having the application to pass, and store allocated direct descriptors into .I fds array, .I cqe->res will return the number of direct descriptors allocated. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .I res will contain the number of successfully updated file descriptors. On error, the following errors can occur. .TP .B -ENOMEM The kernel was unable to allocate memory for the request. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EFAULT The kernel was unable to copy in the memory pointed to by .IR fds . .TP .B -EBADF On of the descriptors located in .I fds didn't refer to a valid file descriptor, or one of the file descriptors in the array referred to an io_uring instance. .TP .B -EOVERFLOW The product of .I offset and .I nr_fds exceed the valid amount or overflowed. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2) liburing-2.6/man/io_uring_prep_fixed_fd_install.3000066400000000000000000000047541461424365000222710ustar00rootroot00000000000000.\" Copyright (C) 2023 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_fixed_fd_install 3 "December 8, 2023" "liburing-2.6" "liburing Manual" .SH NAME io_uring_prep_fixed_fd_install \- prepare fixed file fd installation request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_fixed_fd_install(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned int " file_flags "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_fixed_fd_install (3) helper prepares a fixed file descriptor installation. The submission queue entry .I sqe is setup to install the direct/fixed file descriptor .I fd with the specified .I file_flags file installation flags, and the .I flags request specific flags into the normal process file table. One use case of direct/fixed file descriptors is to turn a regular file descriptor into a direct one, reducing the overhead of any request that needs to access this file. This helper provides a way to go the other way, turning a direct descriptor into a regular file descriptor that can then subsequently be used by regular system calls that take a normal file descriptor. This can be handy if no regular file descriptor exists for this direct descriptor. Either because it was instantiated directly as a fixed descriptor, or because the regular file was closed with .BR close (2) after being turned into a direct descriptor. Upon successful return of this request, both a normal and fixed file descriptor exists for the same file. Either one of them may be used to access the file. Either one of them may be closed without affecting the other one. .I file_flags may be either zero, or set to .B O_CLOEXEC to indicate that the new regular file descriptor should be closed during exec. Setting this field to anything but those two values will result in the request being failed with .B -EINVAL in the CQE .I res field. .I flags is as-of yet unused and must be set to zero. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation, which in this case will be the value of the new regular file descriptor. In case of failure, a negative value is returned. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register_files (3), .BR io_uring_unregister_files (3), .BR io_uring_prep_close_direct (3), .BR io_uring_prep_openat_direct (3) liburing-2.6/man/io_uring_prep_fsetxattr.3000077700000000000000000000000001461424365000256632io_uring_prep_setxattr.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_fsync.3000066400000000000000000000034741461424365000201130ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_fsync 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_fsync \- prepare an fsync request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_fsync(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_fsync (3) function prepares an fsync request. The submission queue entry .I sqe is setup to use the file descriptor .I fd that should get synced, with the modifier flags indicated by the .I flags argument. This function prepares an fsync request. It can act either like an .BR fsync (2) operation, which is the default behavior. If .B IORING_FSYNC_DATASYNC is set in the .I flags argument, then it behaves like .BR fdatasync (2). If no range is specified, the .I fd will be synced from 0 to end-of-file. It's possible to specify a range to sync, if one is desired. If the .I off field of the SQE is set to non-zero, then that indicates the offset to start syncing at. If .I len is set in the SQE, then that indicates the size in bytes to sync from the offset. Note that these fields are not accepted by this helper, so they have to be set manually in the SQE after calling this prep helper. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR fsync (2), .BR fdatasync (2) liburing-2.6/man/io_uring_prep_ftruncate.3000066400000000000000000000022401461424365000207520ustar00rootroot00000000000000.\" Copyright (C) 2024 Tony Solomonik .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_ftruncate 3 "January 23, 2024" "liburing-2.6" "liburing Manual" .SH NAME io_uring_prep_ftruncate \- prepare an ftruncate request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_ftruncate(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " loff_t " len ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_ftruncate (3) function prepares an ftruncate request. The submission queue entry .I sqe is setup to use the file descriptor .I fd that should get truncated to the length indicated by the .I len argument. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR ftruncate (2), liburing-2.6/man/io_uring_prep_futex_wait.3000066400000000000000000000045401461424365000211430ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_futex_wait 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_futex_wait \- prepare a futex wait request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_futex_wait(struct io_uring_sqe *" sqe "," .BI " uint32_t *" futex "," .BI " uint64_t " val "," .BI " uint64_t " mask "," .BI " uint32_t " futex_flags "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_futex_wait (3) function prepares a futex wait request. The submission queue entry .I sqe is setup for waiting on a futex at address .I futex and which still has the value .I val and with .BR futex2 (2) flags of .I futex_flags and io_uring futex flags of .I flags . .I mask can be set to a specific bitset mask, which will be matched by the waking side to decide who to wake up. To always get woken, an application may use .B FUTEX_BITSET_MATCH_ANY . .I futex_flags follows the .BR futex2 (2) flags, not the .BR futex (2) v1 interface flags. .I flags are currently unused and hence .B 0 must be passed. This function prepares an async .BR futex (2) wait request. See that man page for details. Note that the io_uring futex wait request is similar to the .B FUTEX_WAIT_BITSET operation, as .B FUTEX_WAIT is a strict subset of that. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unlike the sync futex syscalls that wait on a futex, io_uring does not support passing in a timeout for the request. Instead, applications are encouraged to use a linked timeout to abort the futex request at a given time, if desired. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_futex_waitv (3), .BR io_uring_prep_futex_wake (3), .BR io_uring_prep_link_timeout (3), .BR futex (2) .BR futex2 (2) liburing-2.6/man/io_uring_prep_futex_waitv.3000066400000000000000000000037541461424365000213370ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_futex_waitv 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_futex_waitv \- prepare a futex waitv request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_futex_waitv(struct io_uring_sqe *" sqe "," .BI " struct futex_waitv *" futexv "," .BI " uint32_t " nr_futex "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_futex_waitv (3) function prepares a futex wait request for multiple futexes at the same time. The submission queue entry .I sqe is setup for waiting on all futexes given by .I futexv and .I nr_futex is the number of futexes in that array. .I flags must be set to the io_uring specific futex flags. Unlike .BR io_uring_prep_futex_wait (3), the desired bitset mask and values are passed in .IR futexv . .I flags are currently unused and hence .B 0 must be passed. This function prepares an async .BR futex (2) waitv request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unlike the sync futex syscalls that wait on a futex, io_uring does not support passing in a timeout for the request. Instead, applications are encouraged to use a linked timeout to abort the futex request at a given time, if desired. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_futex_wait (3), .BR io_uring_prep_futex_wake (3), .BR io_uring_prep_link_timeout (3), .BR futex (2) .BR futex2 (2) liburing-2.6/man/io_uring_prep_futex_wake.3000066400000000000000000000040671461424365000211320ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_futex_wake 3 "Sep 29, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_futex_wake \- prepare a futex wake request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_futex_wake(struct io_uring_sqe *" sqe "," .BI " uint32_t *" futex "," .BI " uint64_t " val "," .BI " uint64_t " mask "," .BI " uint32_t " futex_flags "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_futex_wake (3) function prepares a futex wake request. The submission queue entry .I sqe is setup for waking any waiters on the futex indicated by .I futex and at most .I val futexes. .I futex_flags indicates the .BR futex2 (2) modifier flags, and io_uring futex flags of .I flags . If a given bitset for who to wake is desired, then that must be set in .I mask . Use .B FUTEX_BITSET_MATCH_ANY to match any waiter on the given futex. .I flags are currently unused and hence .B 0 must be passed. This function prepares an async .BR futex (2) wake request. See that man page for details. Note that the io_uring futex wake request is similar to the .B FUTEX_WAKE_BITSET operation, as .B FUTEX_WAKE is a strict subset of that. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. On success, the value will be the index into .I futexv which received a wakeup. See the related man page for details on possible values for errors. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_futex_wait (3), .BR io_uring_prep_futex_waitv (3), .BR futex (2) .BR futex2 (2) liburing-2.6/man/io_uring_prep_getxattr.3000066400000000000000000000031121461424365000206200ustar00rootroot00000000000000.\" Copyright (C) 2023 Rutvik Patel .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_getxattr 3 "January 23, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_prep_getxattr, io_uring_prep_fgetxattr \- prepare a request to get an extended attribute value .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_getxattr(struct io_uring_sqe *" sqe "," .BI " const char *" name "," .BI " char *" value "," .BI " const char *" path "," .BI " unsigned int " len ");" .PP .BI "void io_uring_prep_fgetxattr(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const char *" name "," .BI " char *" value "," .BI " unsigned int " len ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_getxattr (3) function prepares a request to get an extended attribute value. The submission queue entry .I sqe is setup to get the .I value of the extended attribute identified by .I name and associated with the given .I path in the filesystem. The .I len argument specifies the size (in bytes) of .IR value . .BR io_uring_prep_fgetxattr (3) is identical to .BR io_uring_prep_getxattr (3), only the open file referred to by .I fd is interrogated in place of .IR path . This function prepares an async .BR getxattr (2) request. See that man page for details. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_get_sqe (3), .BR getxattr (2) liburing-2.6/man/io_uring_prep_link.3000077700000000000000000000000001461424365000242002io_uring_prep_linkat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_link_timeout.3000066400000000000000000000044651461424365000214750ustar00rootroot00000000000000.\" Copyright (C) 2023 Rutvik Patel .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_link_timeout 3 "January 23, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_prep_link_timeout \- a timeout request for linked sqes .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_link_timeout(struct io_uring_sqe *" sqe "," .BI " struct __kernel_timespec *" ts "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_link_timeout (3) function prepares a timeout request for linked sqes. The submission queue entry .I sqe is setup a timeout specified by .IR ts . The flags argument holds modifier .I flags for the timeout behaviour of the request. The .I ts argument must be filled in with the appropriate information for the timeout. It looks as follows: .PP .in +4n .EX struct __kernel_timespec { __kernel_time64_t tv_sec; long long tv_nsec; }; .EE .in .PP The .I flags argument may contain: .TP .B IORING_TIMEOUT_ABS The value specified in .I ts is an absolute value rather than a relative one. .TP .B IORING_TIMEOUT_BOOTTIME The boottime clock source should be used. .TP .B IORING_TIMEOUT_REALTIME The realtime clock source should be used. .TP .B IORING_TIMEOUT_ETIME_SUCCESS Consider an expired timeout a success in terms of the posted completion. .PP It is invalid to create a chain (linked sqes) consisting only of a link timeout request. If all the requests in the chain are completed before timeout, then the link timeout request gets cancelled. Upon timeout, all the uncompleted requests in the chain get cancelled. .SH RETURN VALUE None .SH ERRORS .PP These are the errors that are reported in the CQE .I res field. On success, .B 0 is returned. .TP .B -ETIME The specified timeout occurred and triggered the completion event. .TP .B -EINVAL One of the fields set in the SQE was invalid. For example, two clock sources where given, or the specified timeout seconds or nanoseconds where < 0. .TP .B -EFAULT io_uring was unable to access the data specified by ts. .TP .B -ECANCELED The timeout was canceled because all submitted requests were completed successfully or one of the requests resulted in failure. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_timeout (3) liburing-2.6/man/io_uring_prep_linkat.3000066400000000000000000000051031461424365000202420ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_linkat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_linkat \- prepare a linkat request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_linkat(struct io_uring_sqe *" sqe "," .BI " int " olddirfd "," .BI " const char *" oldpath "," .BI " int " newdirfd "," .BI " const char *" newpath "," .BI " int " flags ");" .PP .BI "void io_uring_prep_link(struct io_uring_sqe *" sqe "," .BI " const char *" oldpath "," .BI " const char *" newpath "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_linkat (3) function prepares a linkat request. The submission queue entry .I sqe is setup to use the old directory file descriptor pointed to by .I olddirfd and old path pointed to by .I oldpath with the new directory file descriptor pointed to by .I newdirfd and the new path pointed to by .I newpath and using the specified flags in .IR flags . The .BR io_uring_prep_link (3) function prepares a link request. The submission queue entry .I sqe is setup to use the old path pointed to by .I oldpath and the new path pointed to by .IR newpath , both relative to the current working directory and using the specified flags in .IR flags . These functions prepare an async .BR linkat (2) or .BR link (2) request. See those man pages for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR linkat (2), .BR link (2) liburing-2.6/man/io_uring_prep_madvise.3000066400000000000000000000025371461424365000204200ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_madvise 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_madvise \- prepare a madvise request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_madvise(struct io_uring_sqe *" sqe "," .BI " void *" addr "," .BI " off_t " len "," .BI " int " advice ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_madvise (3) function prepares an madvise request. The submission queue entry .I sqe is setup to start an madvise operation at the virtual address of .I addr and of .I len length in bytes, giving it the advise located in .IR advice . This function prepares an async .BR madvise (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR madvise (2) liburing-2.6/man/io_uring_prep_mkdir.3000077700000000000000000000000001461424365000245222io_uring_prep_mkdirat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_mkdirat.3000066400000000000000000000044331461424365000204200ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_mkdirat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_mkdirat \- prepare an mkdirat request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_mkdirat(struct io_uring_sqe *" sqe "," .BI " int " dirfd "," .BI " const char *" path "," .BI " mode_t " mode ");" .PP .BI "void io_uring_prep_mkdir(struct io_uring_sqe *" sqe "," .BI " const char *" path "," .BI " mode_t " mode ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_mkdirat (3) function prepares a mkdirat request. The submission queue entry .I sqe is setup to use the directory file descriptor pointed to by .I dirfd to start a mkdirat operation on the path identified by .I path with the mode given in .IR mode . The .BR io_uring_prep_mkdir (3) function prepares a mkdir request. The submission queue entry .I sqe is setup to use the current working directory to start a mkdir operation on the path identified by .I path with the mode given in .IR mode . These functions prepare an async .BR mkdir (2) or .BR mkdirat (2) request. See those man pages for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR mkdirat (2), .BR mkdir (2) liburing-2.6/man/io_uring_prep_msg_ring.3000066400000000000000000000051511461424365000205700ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_msg_ring 3 "March 10, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_msg_ring \- send a message to another ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_msg_ring(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned int " len "," .BI " __u64 " data "," .BI " unsigned int " flags ");" .PP .BI "void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned int " len "," .BI " __u64 " data "," .BI " unsigned int " flags "," .BI " unsigned int " cqe_flags ");" .fi .SH DESCRIPTION .PP .BR io_uring_prep_msg_ring (3) prepares to send a CQE to an io_uring file descriptor. The submission queue entry .I sqe is setup to use the file descriptor .IR fd , which must identify a io_uring context, to post a CQE on that ring where the target CQE .B res field will contain the content of .I len and the .B user_data of .I data with the request modifier flags set by .IR flags . Currently there are no valid flag modifiers, this field must contain .BR 0 . The targeted ring may be any ring that the user has access to, even the ring itself. This request can be used for simple message passing to another ring, allowing 32+64 bits of data to be transferred through the .I len and .I data fields. The use case may be anything from simply waking up someone waiting on the targeted ring, or it can be used to pass messages between the two rings. .BR io_uring_prep_msg_ring_cqe_flags (3) is similar to .BR io_uring_prep_msg_ring (3) . But has an addition .I cqe_flags parameter, which is used to set .I flags field on CQE side. That way, you can set the CQE flags field .I cqe->flags when sending a message. Be aware that io_uring could potentially set additional bits into this field. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. .TP .B -ENOMEM The kernel was unable to allocate memory for the request. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EBADFD The descriptor passed in .I fd does not refer to an io_uring file descriptor, or the ring is in a disabled state. .TP .B -EOVERFLOW The kernel was unable to fill a CQE on the target ring. This can happen if the target CQ ring is in an overflow state and the kernel wasn't able to allocate memory for a new CQE entry. liburing-2.6/man/io_uring_prep_msg_ring_cqe_flags.3000077700000000000000000000000001461424365000273772io_uring_prep_msg_ring.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_msg_ring_fd.3000066400000000000000000000045771461424365000212540ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_msg_ring 3 "Mar 16, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_prep_msg_ring_fd \- send a direct descriptor to another ring .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_msg_ring_fd(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " int " source_fd "," .BI " int " target_fd "," .BI " __u64 " data "," .BI " unsigned int " flags ");" .PP .BI "void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " int " source_fd "," .BI " __u64 " data "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP .BR io_uring_prep_msg_ring_fd (3) prepares an SQE to send a direct file descriptor to another ring. The submission queue entry .I sqe is setup to use the file descriptor .IR fd , which must identify a target io_uring context, to send the locally registered file descriptor with value .I source_fd to the destination ring into index .I target_fd and passing .I data as the user data in the target CQE with the request modifier flags set by .IR flags . Currently there are no valid flag modifiers, this field must contain .BR 0 . .BR io_uring_prep_msg_ring_fd_alloc (3) is similar to .BR io_uring_prep_msg_ring_fd (3) , but doesn't specify a target index for the direct descriptor. Instead, this index is allocated in the target ring and returned in the CQE .IR res field. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. .TP .B -ENOMEM The kernel was unable to allocate memory for the request. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EINVAL Target ring is identical to the source ring. .TP .B -EBADFD The descriptor passed in .I fd does not refer to an io_uring file descriptor, or the ring is in a disabled state. .TP .B -EOVERFLOW The kernel was unable to fill a CQE on the target ring. This can happen if the target CQ ring is in an overflow state and the kernel wasn't able to allocate memory for a new CQE entry. .TP .B -ENFILE The direct descriptor table in the target ring was full, no new descriptors could be successfully allocated. liburing-2.6/man/io_uring_prep_msg_ring_fd_alloc.3000077700000000000000000000000001461424365000276672io_uring_prep_msg_ring_fd.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_multishot_accept.3000077700000000000000000000000001461424365000265672io_uring_prep_accept.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_multishot_accept_direct.3000077700000000000000000000000001461424365000301212io_uring_prep_accept.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_nop.3000066400000000000000000000011341461424365000175540ustar00rootroot00000000000000.\" Copyright (C) 2022 Samuel Williams .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_nop 3 "October 20, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_nop \- prepare a nop request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_nop(struct io_uring_sqe *" sqe ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_nop (3) function prepares nop (no operation) request. The submission queue entry .I sqe does not require any additional setup. .SH RETURN VALUE None .SH ERRORS None .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), liburing-2.6/man/io_uring_prep_openat.3000066400000000000000000000075441461424365000202610ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_openat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_openat \- prepare an openat request .SH SYNOPSIS .nf .B #include .B #include .B #include .B #include .PP .BI "void io_uring_prep_openat(struct io_uring_sqe *" sqe "," .BI " int " dfd "," .BI " const char *" path "," .BI " int " flags "," .BI " mode_t " mode ");" .PP .BI "void io_uring_prep_openat_direct(struct io_uring_sqe *" sqe "," .BI " int " dfd "," .BI " const char *" path "," .BI " int " flags "," .BI " mode_t " mode "," .BI " unsigned " file_index ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_openat (3) function prepares an openat request. The submission queue entry .I sqe is setup to use the directory file descriptor .I dfd to start opening a file described by .I path and using the open flags in .I flags and using the file mode bits specified in .IR mode . For a direct descriptor open request, the offset is specified by the .I file_index argument. Direct descriptors are io_uring private file descriptors. They avoid some of the overhead associated with thread shared file tables, and can be used in any io_uring request that takes a file descriptor. To do so, .B IOSQE_FIXED_FILE must be set in the SQE .I flags member, and the SQE .I fd field should use the direct descriptor value rather than the regular file descriptor. Direct descriptors are managed like registered files. If the direct variant is used, the application must first have registered a file table using .BR io_uring_register_files (3) of the appropriate size. Once registered, a direct accept request may use any entry in that table, as long as it is within the size of the registered table. If a specified entry already contains a file, the file will first be removed from the table and closed. It's consistent with the behavior of updating an existing file with .BR io_uring_register_files_update (3). Note that old kernels don't check the SQE .I file_index field, which is not a problem for liburing helpers, but users of the raw io_uring interface need to zero SQEs to avoid unexpected behavior. If .B IORING_FILE_INDEX_ALLOC is used as the .I file_index for a direct open, then io_uring will allocate a free direct descriptor in the existing table. The allocated descriptor is returned in the CQE .I res field just like it would be for a non-direct open request. If no more entries are available in the direct descriptor table, .B -ENFILE is returned instead. These functions prepare an async .BR openat (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR openat (2) liburing-2.6/man/io_uring_prep_openat2.3000066400000000000000000000076661461424365000203500ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_openat2 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_openat2 \- prepare an openat2 request .SH SYNOPSIS .nf .B #include .B #include .B #include .B #include .B #include .PP .BI "void io_uring_prep_openat2(struct io_uring_sqe *" sqe "," .BI " int " dfd "," .BI " const char *" path "," .BI " int " flags "," .BI " struct open_how *" how ");" .PP .BI "void io_uring_prep_openat2_direct(struct io_uring_sqe *" sqe "," .BI " int " dfd "," .BI " const char *" path "," .BI " int " flags "," .BI " struct open_how *" how "," .BI " unsigned " file_index ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_openat2 (3) function prepares an openat2 request. The submission queue entry .I sqe is setup to use the directory file descriptor .I dfd to start opening a file described by .I path and using the open flags in .I flags and using the instructions on how to open the file given in .IR how . For a direct descriptor open request, the offset is specified by the .I file_index argument. Direct descriptors are io_uring private file descriptors. They avoid some of the overhead associated with thread shared file tables, and can be used in any io_uring request that takes a file descriptor. To do so, .B IOSQE_FIXED_FILE must be set in the SQE .I flags member, and the SQE .I fd field should use the direct descriptor value rather than the regular file descriptor. Direct descriptors are managed like registered files. If the direct variant is used, the application must first have registered a file table using .BR io_uring_register_files (3) of the appropriate size. Once registered, a direct accept request may use any entry in that table, as long as it is within the size of the registered table. If a specified entry already contains a file, the file will first be removed from the table and closed. It's consistent with the behavior of updating an existing file with .BR io_uring_register_files_update (3). Note that old kernels don't check the SQE .I file_index field, which is not a problem for liburing helpers, but users of the raw io_uring interface need to zero SQEs to avoid unexpected behavior. If .B IORING_FILE_INDEX_ALLOC is used as the .I file_index for a direct open, then io_uring will allocate a free direct descriptor in the existing table. The allocated descriptor is returned in the CQE .I res field just like it would be for a non-direct open request. If no more entries are available in the direct descriptor table, .B -ENFILE is returned instead. These functions prepare an async .BR openat2 (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR openat2 (2) liburing-2.6/man/io_uring_prep_openat2_direct.3000077700000000000000000000000001461424365000262332io_uring_prep_openat2.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_openat_direct.3000077700000000000000000000000001461424365000260672io_uring_prep_openat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_poll_add.3000066400000000000000000000043271461424365000205450ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_poll_add 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_poll_add \- prepare a poll request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_poll_add(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned " poll_mask ");" .PP .BI "void io_uring_prep_poll_multishot(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned " poll_mask ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_poll_add (3) function prepares a poll request. The submission queue entry .I sqe is setup to use the file descriptor .I fd that should get polled, with the events desired specified in the .I poll_mask argument. The default behavior is a single-shot poll request. When the specified event has triggered, a completion CQE is posted and no more events will be generated by the poll request. .BR io_uring_prep_poll_multishot (3) behaves identically in terms of events, but it persist across notifications and will repeatedly post notifications for the same registration. A CQE posted from a multishot poll request will have .B IORING_CQE_F_MORE set in the CQE .I flags member, indicating that the application should expect more completions from this request. If the multishot poll request gets terminated or experiences an error, this flag will not be set in the CQE. If this happens, the application should not expect further CQEs from the original request and must reissue a new one if it still wishes to get notifications on this file descriptor. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation, which is a bitmask of the events notified. See the .BR poll (2) man page for details. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR poll (2), .BR epoll_ctl (3) liburing-2.6/man/io_uring_prep_poll_multishot.3000077700000000000000000000000001461424365000266152io_uring_prep_poll_add.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_poll_remove.3000066400000000000000000000027361461424365000213140ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_poll_remove 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_poll_remove \- prepare a poll deletion request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_poll_remove(struct io_uring_sqe *" sqe "," .BI " __u64 " user_data ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_prep_poll_remove (3) function prepares a poll removal request. The submission queue entry .I sqe is setup to remove a poll request identified by .I user_data Works like .BR io_uring_prep_cancel (3) except only looks for poll requests. Apart from that, behavior is identical. See that man page for specific details. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .B 0 is returned. .TP .B -ENOENT The request identified by .I user_data could not be located. This could be because it completed before the cancelation request was issued, or if an invalid identifier is used. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EALREADY The execution state of the request has progressed far enough that cancelation is no longer possible. This should normally mean that it will complete shortly, either successfully, or interrupted due to the cancelation. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_cancel (3) liburing-2.6/man/io_uring_prep_poll_update.3000066400000000000000000000060531461424365000212750ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_poll_update 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_poll_update \- update an existing poll request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_poll_update(struct io_uring_sqe *" sqe "," .BI " __u64 " old_user_data "," .BI " __u64 " new_user_data "," .BI " unsigned " poll_mask "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_poll_update (3) function prepares a poll update request. The submission queue entry .I sqe is setup to update a poll request identified by .IR old_user_data , replacing it with the .I new_user_data information. The .I poll_mask arguments contains the new mask to use for the poll request, and .I flags argument contains modifier flags telling io_uring what fields to update. The .I flags modifier flags is a bitmask and may contain and OR'ed mask of: .TP .B IORING_POLL_UPDATE_EVENTS If set, the poll update request will replace the existing events being waited for with the ones specified in the .I poll_mask argument to the function. Note that only the lower 16 bits of events can be updated. This includes things like .B EPOLLIN and .B EPOLLOUT . Higher order masks/settings are included as internal state, and cannot be modified. That includes settings like .B EPOLLONESHOT , .B EPOLLEXCLUSIVE , and .B EPOLLET . If an application wishes to modify these, it must cancel/remove the existing poll request and arm a new one. .TP .B IORING_POLL_UPDATE_USER_DATA If set, the poll update request will update the existing user_data of the request with the value passed in as the .I new_user_data argument. .TP .B IORING_POLL_ADD_MULTI If set, this will change the poll request from a singleshot to a multishot request. This must be used along with .B IORING_POLL_UPDATE_EVENTS as the event field must be updated to enable multishot. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .B 0 is returned. .TP .B -ENOENT The request identified by .I user_data could not be located. This could be because it completed before the cancelation request was issued, or if an invalid identifier is used. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -EALREADY The execution state of the request has progressed far enough that cancelation is no longer possible. This should normally mean that it will complete shortly, either successfully, or interrupted due to the cancelation. .TP .B -ECANCELED .B IORING_POLL_UPDATE_EVENTS was set and an error occurred re-arming the poll request with the new mask. The original poll request is terminated if this happens, and that termination CQE will contain the reason for the error re-arming. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_poll_add (3), .BR io_uring_prep_poll_multishot (3) liburing-2.6/man/io_uring_prep_provide_buffers.3000066400000000000000000000076601461424365000221560ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_provide_buffers 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_provide_buffers \- prepare a provide buffers request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_provide_buffers(struct io_uring_sqe *" sqe "," .BI " void *" addr "," .BI " int " len "," .BI " int " nr "," .BI " int " bgid "," .BI " int " bid ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_provide_buffers (3) function prepares a request for providing the kernel with buffers. The submission queue entry .I sqe is setup to consume .I nr number of .I len sized buffers starting at .I addr and identified by the buffer group ID of .I bgid and numbered sequentially starting at .IR bid . This function sets up a request to provide buffers to the io_uring context that can be used by read or receive operations. This is done by filling in the SQE .I buf_group field and setting .B IOSQE_BUFFER_SELECT in the SQE .I flags member. If buffer selection is used for a request, no buffer should be provided in the address field. Instead, the group ID is set to match one that was previously provided to the kernel. The kernel will then select a buffer from this group for the IO operation. On successful completion of the IO request, the CQE .I flags field will have .B IORING_CQE_F_BUFFER set and the selected buffer ID will be indicated by the upper 16-bits of the .I flags field. Different buffer group IDs can be used by the application to have different sizes or types of buffers available. Once a buffer has been consumed for an operation, it is no longer known to io_uring. It must be re-provided if so desired or freed by the application if no longer needed. The buffer IDs are internally tracked from .I bid and sequentially ascending from that value. If .B 16 buffers are provided and start with an initial .I bid of 0, then the buffer IDs will range from .BR 0..15 . The application must be aware of this to make sense of the buffer ID passed back in the CQE. Buffer IDs always range from .B 0 to .B 65535 , as there are only 16-bits available in the CQE to pass them back. This range is independent of how the buffer group initially got created. Attempting to add buffer IDs larger than that, or buffer IDs that will wrap when cast to a 16-bit value, will cause the request to fail with .B -E2BIG or .B -EINVAL . Not all requests support buffer selection, as it only really makes sense for requests that receive data from the kernel rather than write or provide data. Currently, this mode of operation is supported for any file read or socket receive request. Attempting to use .B IOSQE_BUFFER_SELECT with a command that doesn't support it will result in a CQE .I res error of .BR -EINVAL . Buffer selection will work with operations that take a .B struct iovec as its data destination, but only if 1 iovec is provided. . .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .I res will contain .B 0 or the number of successfully provided buffers. .TP .B -ENOMEM The kernel was unable to allocate memory for the request. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -E2BIG The number of buffers provided was too big, or the .I bid was too big. A max value of .B USHRT_MAX buffers can be specified. .TP .B -EFAULT Some of the user memory given was invalid for the application. .TP .B -EOVERFLOW The product of .I len and .I nr exceed the valid amount or overflowed, or the sum of .I addr and the length of buffers overflowed. .TP .B -EBUSY Attempt to update a slot that is already used. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR io_uring_prep_remove_buffers (3) liburing-2.6/man/io_uring_prep_read.3000066400000000000000000000035751461424365000177060ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_read 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_read \- prepare I/O read request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_read(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " void *" buf "," .BI " unsigned " nbytes "," .BI " __u64 " offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_read (3) prepares an IO read request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start reading .I nbytes into the buffer .I buf at the specified .IR offset . On files that support seeking, if the offset is set to .BR -1 , the read operation commences at the file offset, and the file offset is incremented by the number of bytes read. See .BR read (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature, if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the read has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_readv (3), .BR io_uring_prep_readv2 (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_read_fixed.3000066400000000000000000000035021461424365000210530ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_read 3 "February 13, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_read_fixed \- prepare I/O read request with registered buffer .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_read_fixed(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " void *" buf "," .BI " unsigned " nbytes "," .BI " __u64 " offset "," .BI " int " buf_index ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_read_fixed (3) prepares an IO read request with a previously registered IO buffer. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start reading .I nbytes into the buffer .I buf at the specified .IR offset , and with the buffer matching the registered index of .IR buf_index . This works just like .BR io_uring_prep_read (3) except it requires the use of buffers that have been registered with .BR io_uring_register_buffers (3). The .I buf and .I nbytes arguments must fall within a region specified by .I buf_index in the previously registered buffer. The buffer need not be aligned with the start of the registered buffer. After the read has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_prep_read (3), .BR io_uring_register_buffers (3) liburing-2.6/man/io_uring_prep_read_multishot.3000066400000000000000000000056221461424365000220110ustar00rootroot00000000000000.\" Copyright (C) 2023 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_read_multishot 3 "September 12, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_read_multishot \- prepare I/O read multishot request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_read_multishot(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned " nbytes "," .BI " __u64 " offset "," .BI " int " buf_group ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_read_multishot (3) helper prepares an IO read multishot request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start reading into a buffer from the provided buffer group with ID .I buf_group at the specified .IR offset . .I nbytes must be set to zero, as the size read will be given by the size of the buffers in the indicated buffer group IO. On files that are not capable of seeking, the offset must be 0 or -1. If .I nbytes exceeds the size of the buffers in the specified buffer group, or if .I nbytes is .B 0 , then the size of the buffer in that group will be used for the transfer. A multishot read request will repeatedly trigger a completion event whenever data is available to read from the file. Because of that, this type of request can only be used with a file type that is pollable. Examples of that include pipes, tun devices, etc. If used with a regular file, or a wrong file type in general, the request will fail with .B -EBADFD in the CQE .I res field. Since multishot requests repeatedly trigger completion events as data arrives, it must be used with provided buffers. With provided buffers, the application provides buffers to io_uring upfront, and then the kernel picks a buffer from the specified group in .I buf_group when the request is ready to transfer data. A multishot request will persist as long as no errors are encountered doing handling of the request. For each CQE posted on behalf of this request, the CQE .I flags will have .B IORING_CQE_F_MORE set if the application should expect more completions from this request. If this flag isn't set, then that signifies termination of the multishot read request. After the read has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_read (3), .BR io_uring_buf_ring_init (3) .BR io_uring_buf_ring_add (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_readv.3000066400000000000000000000052051461424365000200640ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_readv 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_readv \- prepare vector I/O read request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_readv(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct iovec *" iovecs "," .BI " unsigned " nr_vecs "," .BI " __u64 " offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_readv (3) prepares a vectored IO read request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start reading .I nr_vecs into the .I iovecs array at the specified .IR offset . On files that support seeking, if the offset is set to .BR -1 , the read operation commences at the file offset, and the file offset is incremented by the number of bytes read. See .BR read (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature, if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the read has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unless an application explicitly needs to pass in more than one iovec, it is more efficient to use .BR io_uring_prep_read (3) rather than this function, as no state has to be maintained for a non-vectored IO request. As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_read (3), .BR io_uring_prep_readv2 (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_readv2.3000066400000000000000000000060301461424365000201430ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_readv2 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_readv2 \- prepare vector I/O read request with flags .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_readv2(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct iovec *" iovecs "," .BI " unsigned " nr_vecs "," .BI " __u64 " offset "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_readv2 (3) prepares a vectored IO read request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start reading .I nr_vecs into the .I iovecs array at the specified .IR offset . The behavior of the function can be controlled with the .I flags parameter. Supported values for .I flags are: .TP .B RWF_HIPRI High priority request, poll if possible .TP .B RWF_DSYNC per-IO O_DSYNC .TP .B RWF_SYNC per-IO O_SYNC .TP .B RWF_NOWAIT per-IO, return .B -EAGAIN if operation would block .TP .B RWF_APPEND per-IO O_APPEND .P On files that support seeking, if the offset is set to .BR -1 , the read operation commences at the file offset, and the file offset is incremented by the number of bytes read. See .BR read (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature, if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the read has been prepared, it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unless an application explicitly needs to pass in more than one iovec, it is more efficient to use .BR io_uring_prep_read (3) rather than this function, as no state has to be maintained for a non-vectored IO request. As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_read (3), .BR io_uring_prep_readv (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_recv.3000066400000000000000000000062621461424365000177260ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_recv 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_recv \- prepare a recv request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_recv(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " void *" buf "," .BI " size_t " len "," .BI " int " flags ");" .PP .BI "void io_uring_prep_recv_multishot(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " void *" buf "," .BI " size_t " len "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_recv (3) function prepares a recv request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start receiving the data into the destination buffer .I buf of size .I len and with modifier flags .IR flags . This function prepares an async .BR recv (2) request. See that man page for details on the arguments specified to this prep helper. The multishot version allows the application to issue a single receive request, which repeatedly posts a CQE when data is available. It requires length to be 0, the .B IOSQE_BUFFER_SELECT flag to be set and no .B MSG_WAITALL flag to be set. Therefore each CQE will take a buffer out of a provided buffer pool for receiving. The application should check the flags of each CQE, regardless of its result. If a posted CQE does not have the .B IORING_CQE_F_MORE flag set then the multishot receive will be done and the application should issue a new request. Multishot variants are available since kernel 6.0. After calling this function, additional io_uring internal modifier flags may be set in the SQE .I ioprio field. The following flags are supported: .TP .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently empty and attempting to receive data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a receive of the data when the socket has data to be read. This initial receive attempt can be wasteful for the case where the socket is expected to be empty, setting this flag will bypass the initial receive attempt and go straight to arming poll. If poll does indicate that data is ready to be received, the operation will proceed. Can be used with the CQE .B IORING_CQE_F_SOCK_NONEMPTY flag, which io_uring will set on CQEs after a .BR recv (2) or .BR recvmsg (2) operation. If set, the socket still had data to be read after the operation completed. Both these flags are available since 5.19. .P .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR recv (2) liburing-2.6/man/io_uring_prep_recv_multishot.3000077700000000000000000000000001461424365000257672io_uring_prep_recv.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_recvmsg.3000066400000000000000000000077011461424365000204340ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_recvmsg 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_recvmsg \- prepare a recvmsg request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_recvmsg(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " struct msghdr *" msg "," .BI " unsigned " flags ");" .PP .BI "void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " struct msghdr *" msg "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_recvmsg (3) function prepares a recvmsg request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start receiving the data indicated by .I msg with the .BR recvmsg (2) defined flags in the .I flags argument. This function prepares an async .BR recvmsg (2) request. See that man page for details on the arguments specified to this prep helper. The multishot version allows the application to issue a single receive request, which repeatedly posts a CQE when data is available. It requires the .B IOSQE_BUFFER_SELECT flag to be set and no .B MSG_WAITALL flag to be set. Therefore each CQE will take a buffer out of a provided buffer pool for receiving. The application should check the flags of each CQE, regardless of its result. If a posted CQE does not have the .B IORING_CQE_F_MORE flag set then the multishot receive will be done and the application should issue a new request. Unlike .BR recvmsg (2), multishot recvmsg will prepend a .I struct io_uring_recvmsg_out which describes the layout of the rest of the buffer in combination with the initial .I struct msghdr submitted with the request. See .BR io_uring_recvmsg_out (3) for more information on accessing the data. Multishot variants are available since kernel 6.0. After calling this function, additional io_uring internal modifier flags may be set in the SQE .I ioprio field. The following flags are supported: .TP .B IORING_RECVSEND_POLL_FIRST If set, io_uring will assume the socket is currently empty and attempting to receive data will be unsuccessful. For this case, io_uring will arm internal poll and trigger a receive of the data when the socket has data to be read. This initial receive attempt can be wasteful for the case where the socket is expected to be empty, setting this flag will bypass the initial receive attempt and go straight to arming poll. If poll does indicate that data is ready to be received, the operation will proceed. Can be used with the CQE .B IORING_CQE_F_SOCK_NONEMPTY flag, which io_uring will set on CQEs after a .BR recv (2) or .BR recvmsg (2) operation. If set, the socket still had data to be read after the operation completed. Both these flags are available since 5.19. .P .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR recvmsg (2) liburing-2.6/man/io_uring_prep_recvmsg_multishot.3000077700000000000000000000000001461424365000272052io_uring_prep_recvmsg.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_remove_buffers.3000066400000000000000000000024311461424365000217720ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_remove_buffers 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_remove_buffers \- prepare a remove buffers request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_remove_buffers(struct io_uring_sqe *" sqe "," .BI " int " nr "," .BI " int " bgid ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_remove_buffers (3) function prepares a request for removing previously supplied buffers. The submission queue entry .I sqe is setup to remove .I nr number of buffers from the buffer group ID indicated by .IR bgid . .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .I res will contain the number of successfully removed buffers. On error, the following errors can occur. .TP .B -ENOMEM The kernel was unable to allocate memory for the request. .TP .B -EINVAL One of the fields set in the SQE was invalid. .TP .B -ENOENT No buffers exist at the specified .I bgid buffer group ID. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR io_uring_prep_provide_buffers (3) liburing-2.6/man/io_uring_prep_rename.3000077700000000000000000000000001461424365000250242io_uring_prep_renameat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_renameat.3000066400000000000000000000053471461424365000205660ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_renameat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_renameat \- prepare a renameat request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_renameat(struct io_uring_sqe *" sqe "," .BI " int " olddirfd "," .BI " const char *" oldpath "," .BI " int " newdirfd "," .BI " const char *" newpath "," .BI " unsigned int " flags ");" .PP .BI "void io_uring_prep_rename(struct io_uring_sqe *" sqe "," .BI " const char *" oldpath "," .BI " const char *" newpath "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_renameat (3) function prepares a renameat request. The submission queue entry .I sqe is setup to use the old directory file descriptor pointed to by .I olddirfd and old path pointed to by .I oldpath with the new directory file descriptor pointed to by .I newdirfd and the new path pointed to by .I newpath and using the specified flags in .IR flags . The .BR io_uring_prep_rename (3) function prepares a rename request. The submission queue entry .I sqe is setup to use the old path pointed to by .I oldpath with the new path pointed to by .IR newpath , both relative to the current working directory and using the specified flags in .IR flags . These functions prepare an async .BR renameat2 (2) or .BR rename (2) request. If .I flags is zero, then this call is similar to the .BR renameat (2) system call. See those man pages for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR renameat (2), .BR renameat2 (2), .BR rename (2) liburing-2.6/man/io_uring_prep_send.3000066400000000000000000000045101461424365000177120ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_send 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_send \- prepare a send request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_send(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " const void *" buf "," .BI " size_t " len "," .BI " int " flags ");" .PP .BI "void io_uring_prep_sendto(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " const void *" buf "," .BI " size_t " len "," .BI " int " flags "," .BI " const struct sockaddr *" addr "," .BI " socklen_t " addrlen ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_send (3) function prepares a send request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start sending the data from .I buf of size .I len bytes and with modifier flags .IR flags . Note that using .B IOSQE_IO_LINK with this request type requires the setting of .B MSG_WAITALL in the .IR flags argument, as a short send isn't a considered an error condition without that being set. This function prepares an async .BR send (2) request. See that man page for details. The .BR io_uring_prep_sendto (3) function prepares a sendto request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start sending the data from .I buf of size .I len bytes and with modifier flags .IR flags . The destination address is specified by .I addr and .I addrlen and must be a valid address for the socket type. This function prepares an async .BR sendto (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR send (2) .BR sendto (2) liburing-2.6/man/io_uring_prep_send_set_addr.3000066400000000000000000000017561461424365000215700ustar00rootroot00000000000000.\" Copyright (C) 2023 Rutvik Patel .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_send_set_addr 3 "January 23, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_prep_send_set_addr \- set address details for send requests .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_send_set_addr(struct io_uring_sqe *" sqe "," .BI " const struct sockaddr *" dest_addr "," .BI " __u16 " addr_len ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_send_set_addr (3) function sets a socket destination address specified by .I dest_addr and its length using .I addr_len parameters. It can be used once .I sqe is prepared using any of the .BR send (2) io_uring helpers. See man pages of .BR io_uring_prep_send (3) or .BR io_uring_prep_send_zc (3). .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_send (3), .BR io_uring_prep_send_zc (3), .BR send (2) liburing-2.6/man/io_uring_prep_send_zc.3000066400000000000000000000050701461424365000204100ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_send_zc 3 "September 6, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_prep_send_zc \- prepare a zerocopy send request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_send_zc(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " const void *" buf "," .BI " size_t " len "," .BI " int " flags "," .BI " unsigned " zc_flags ");" .PP .BI "void io_uring_prep_send_zc_fixed(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " const void *" buf "," .BI " size_t " len "," .BI " int " flags "," .BI " unsigned " zc_flags ");" .BI " unsigned " buf_index ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_send_zc (3) function prepares a zerocopy send request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd to start sending the data from .I buf of size .I len bytes with send modifier flags .I flags and zerocopy modifier flags .IR zc_flags . The .BR io_uring_prep_send_zc_fixed (3) works just like .BR io_uring_prep_send_zc (3) except it requires the use of buffers that have been registered with .BR io_uring_register_buffers (3). The .I buf and .I len arguments must fall within a region specified by .I buf_index in the previously registered buffer. The buffer need not be aligned with the start of the registered buffer. Note that using .B IOSQE_IO_LINK with this request type requires the setting of .B MSG_WAITALL in the .I flags argument, as a short send isn't considered an error condition without that being set. These functions prepare an async zerocopy .BR send (2) request. See that man page for details. For details on the zerocopy nature of it, see .BR io_uring_enter (2) . .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_send (3), .BR io_uring_enter (2), .BR send (2) liburing-2.6/man/io_uring_prep_send_zc_fixed.3000077700000000000000000000000001461424365000262122io_uring_prep_send_zc.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_sendmsg.3000066400000000000000000000046421461424365000204270ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_sendmsg 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_sendmsg \- prepare a sendmsg request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_sendmsg(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct msghdr *" msg "," .BI " unsigned " flags ");" .PP .BI "void io_uring_prep_sendmsg_zc(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct msghdr *" msg "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_sendmsg (3) function prepares a sendmsg request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start sending the data indicated by .I msg with the .BR sendmsg (2) defined flags in the .I flags argument. The .BR io_uring_prep_sendmsg_zc (3) accepts the same parameters as .BR io_uring_prep_sendmsg (3) but prepares a zerocopy sendmsg request. Note that using .B IOSQE_IO_LINK with this request type requires the setting of .B MSG_WAITALL in the .I flags argument, as a short send isn't considered an error condition without that being set. This function prepares an async .BR sendmsg (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR sendmsg (2) liburing-2.6/man/io_uring_prep_sendmsg_zc.3000077700000000000000000000000001461424365000255552io_uring_prep_sendmsg.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_sendto.3000077700000000000000000000000001461424365000242062io_uring_prep_send.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_setxattr.3000066400000000000000000000033521461424365000206420ustar00rootroot00000000000000.\" Copyright (C) 2023 Rutvik Patel .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_setxattr 3 "January 23, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_prep_setxattr, io_uring_prep_fsetxattr \- prepare a request to set an extended attribute value .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_setxattr(struct io_uring_sqe *" sqe "," .BI " const char *" name "," .BI " const char *" value "," .BI " const char *" path "," .BI " int " flags "," .BI " unsigned int " len ");" .PP .BI "void io_uring_prep_fsetxattr(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const char *" name "," .BI " const char *" value "," .BI " int " flags "," .BI " unsigned int " len ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_setxattr (3) function prepares a request to set an extended attribute value. The submission queue entry .I sqe is setup to set the .I value of the extended attribute identified by .I name and associated with the given .I path in the filesystem with modifier flags .IR flags . The .I len argument specifies the size (in bytes) of .IR value . .BR io_uring_prep_fsetxattr (3) is identical to .BR io_uring_prep_setxattr (3), only the extended attribute is set on the open file referred to by .I fd in place of .IR path . This function prepares an async .BR setxattr (2) request. See that man page for details. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_get_sqe (3), .BR setxattr (2) liburing-2.6/man/io_uring_prep_shutdown.3000066400000000000000000000023471461424365000206420ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_shutdown 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_shutdown \- prepare a shutdown request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_shutdown(struct io_uring_sqe *" sqe "," .BI " int " sockfd "," .BI " int " how ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_shutdown (3) function prepares a shutdown request. The submission queue entry .I sqe is setup to use the file descriptor .I sockfd that should be shutdown with the .I how argument. This function prepares an async .BR shutdown (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR shutdown (2) liburing-2.6/man/io_uring_prep_socket.3000066400000000000000000000071021461424365000202510ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_socket 3 "May 27, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_socket \- prepare a socket creation request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_socket(struct io_uring_sqe *" sqe "," .BI " int " domain "," .BI " int " type "," .BI " int " protocol "," .BI " unsigned int " flags ");" .PP .BI "void io_uring_prep_socket_direct(struct io_uring_sqe *" sqe "," .BI " int " domain "," .BI " int " type "," .BI " int " protocol "," .BI " unsigned int " file_index "," .BI " unsigned int " flags ");" .PP .BI "void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *" sqe "," .BI " int " domain "," .BI " int " type "," .BI " int " protocol "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_socket (3) function prepares a socket creation request. The submission queue entry .I sqe is setup to use the communication domain defined by .I domain and use the communication type defined by .I type and the protocol set by .IR protocol . The .I flags argument are currently unused. The .BR io_uring_prep_socket_direct (3) helper works just like .BR io_uring_prep_socket (3), except it maps the socket to a direct descriptor rather than return a normal file descriptor. The .I file_index argument should be set to the slot that should be used for this socket. The .BR io_uring_prep_socket_direct_alloc (3) helper works just like .BR io_uring_prep_socket_direct (3), except it allocates a new direct descriptor rather than pass a free slot in. It is equivalent to using .BR io_uring_prep_socket_direct (3) with .B IORING_FILE_INDEX_ALLOC as the .I file_index . Upon completion, the .I res field of the CQE will return the direct slot that was allocated for the socket. If the direct variants are used, the application must first have registered a file table using .BR io_uring_register_files (3) of the appropriate size. Once registered, a direct socket request may use any entry in that table, as long as it is within the size of the registered table. If a specified entry already contains a file, the file will first be removed from the table and closed. It's consistent with the behavior of updating an existing file with .BR io_uring_register_files_update (3). For a direct descriptor socket request, the .I file_index argument can be set to .BR IORING_FILE_INDEX_ALLOC , In this case a free entry in io_uring file table will be used automatically and the file index will be returned as CQE .IR res . .B -ENFILE is otherwise returned if there is no free entries in the io_uring file table. These functions prepare an async .BR socket (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR socket (2) liburing-2.6/man/io_uring_prep_socket_direct.3000077700000000000000000000000001461424365000260732io_uring_prep_socket.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_socket_direct_alloc.3000077700000000000000000000000001461424365000272452io_uring_prep_socket.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_splice.3000066400000000000000000000051171461424365000202440ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_splice 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_splice \- prepare an splice request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_splice(struct io_uring_sqe *" sqe "," .BI " int " fd_in "," .BI " int64_t " off_in "," .BI " int " fd_out "," .BI " int64_t " off_out "," .BI " unsigned int " nbytes "," .BI " unsigned int " splice_flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_splice (3) function prepares a splice request. The submission queue entry .I sqe is setup to use as input the file descriptor .I fd_in at offset .IR off_in , splicing data to the file descriptor at .I fd_out and at offset .IR off_out . .I nbytes bytes of data should be spliced between the two descriptors. .I splice_flags are modifier flags for the operation. See .BR splice (2) for the generic splice flags. If the .I fd_out descriptor, .B IOSQE_FIXED_FILE can be set in the SQE to indicate that. For the input file, the io_uring specific .B SPLICE_F_FD_IN_FIXED can be set in .I splice_flags and .I fd_in given as a registered file descriptor offset. If .I fd_in refers to a pipe, .I off_in is ignored and must be set to -1. If .I fd_in does not refer to a pipe and .I off_in is -1, then .I nbytes are read from .I fd_in starting from the file offset, which is incremented by the number of bytes read. If .I fd_in does not refer to a pipe and .I off_in is not -1, then the starting offset of .I fd_in will be .IR off_in . The same rules apply to .I fd_out and .IR off_out . This function prepares an async .BR splice (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR splice (2) .SH NOTES Note that even if .I fd_in or .I fd_out refers to a pipe, the splice operation can still fail with .B EINVAL if one of the fd doesn't explicitly support splice operation, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11. liburing-2.6/man/io_uring_prep_statx.3000066400000000000000000000040671461424365000201330ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_statx 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_statx \- prepare a statx request .SH SYNOPSIS .nf .B #include .B #include .B #include .B #include .B #include .PP .BI "void io_uring_prep_statx(struct io_uring_sqe *" sqe "," .BI " int " dirfd "," .BI " const char *" path "," .BI " int " flags "," .BI " unsigned " mask "," .BI " struct statx *" statxbuf ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_statx (3) function prepares a statx request. The submission queue entry .I sqe is setup to use the directory file descriptor pointed to by .I dirfd to start a statx operation on the path identified by .I path and using the flags given in .I flags for the fields specified by .I mask and into the buffer located at .IR statxbuf . This function prepares an async .BR statx (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR statx (2) liburing-2.6/man/io_uring_prep_symlink.3000077700000000000000000000000001461424365000254622io_uring_prep_symlinkat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_symlinkat.3000066400000000000000000000046241461424365000210020ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_symlinkat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_symlinkat \- prepare a symlinkat request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_symlinkat(struct io_uring_sqe *" sqe "," .BI " const char *" target "," .BI " int " newdirfd "," .BI " const char *" linkpath ");" .PP .BI "void io_uring_prep_symlink(struct io_uring_sqe *" sqe "," .BI " const char *" target "," .BI " const char *" linkpath ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_symlinkat (3) function prepares a symlinkat request. The submission queue entry .I sqe is setup to symlink the target path pointed to by .I target to the new destination indicated by .I newdirfd and .IR linkpath . The .BR io_uring_prep_symlink (3) function prepares a symlink request. The submission queue entry .I sqe is setup to symlink the target path pointed to by .I target to the new destination indicated by .I linkpath relative to the the current working directory. This function prepares an async .BR symlink (2) request. See that man page for details. These functions prepare an async .BR symlinkat (2) or .BR symlink (2) request. See those man pages for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR symlinkat (2), .BR symlink (2) liburing-2.6/man/io_uring_prep_sync_file_range.3000066400000000000000000000027511461424365000221150ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_sync_file_range 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_sync_file_range \- prepare a sync_file_range request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_sync_file_range(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " unsigned " len "," .BI " __u64 " offset "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_sync_file_range (3) function prepares a sync_file_range request. The submission queue entry .I sqe is setup to use the file descriptor .I fd that should get .I len bytes synced started at offset .I offset and with modifier flags in the .I flags argument. This function prepares an async .BR sync_file_range (2) request. See that man page for details on the arguments. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR sync_file_range (2) liburing-2.6/man/io_uring_prep_tee.3000066400000000000000000000033021461424365000175340ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_tee 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_tee \- prepare a tee request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_tee(struct io_uring_sqe *" sqe "," .BI " int " fd_in "," .BI " int " fd_out "," .BI " unsigned int " nbytes "," .BI " unsigned int " splice_flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_tee (3) function prepares a tee request. The submission queue entry .I sqe is setup to use as input the file descriptor .I fd_in and as output the file descriptor .I fd_out duplicating .I nbytes bytes worth of data. .I splice_flags are modifier flags for the operation. See .BR tee (2) for the generic splice flags. If the .I fd_out descriptor, .B IOSQE_FIXED_FILE can be set in the SQE to indicate that. For the input file, the io_uring specific .B SPLICE_F_FD_IN_FIXED can be set and .I fd_in given as a registered file descriptor offset. This function prepares an async .BR tee (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_register (2), .BR splice (2), .BR tee (2) liburing-2.6/man/io_uring_prep_timeout.3000066400000000000000000000057511461424365000204570ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_poll_timeout 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_timeout \- prepare a timeout request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_timeout(struct io_uring_sqe *" sqe "," .BI " struct __kernel_timespec *" ts "," .BI " unsigned " count "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_timeout (3) function prepares a timeout request. The submission queue entry .I sqe is setup to arm a timeout specified by .I ts and with a timeout count of .I count completion entries. The .I flags argument holds modifier flags for the request. This request type can be used as a timeout waking anyone sleeping for events on the CQ ring. The .I flags argument may contain: .TP .B IORING_TIMEOUT_ABS The value specified in .I ts is an absolute value rather than a relative one. .TP .B IORING_TIMEOUT_BOOTTIME The boottime clock source should be used. .TP .B IORING_TIMEOUT_REALTIME The realtime clock source should be used. .TP .B IORING_TIMEOUT_ETIME_SUCCESS Consider an expired timeout a success in terms of the posted completion. Normally a timeout that triggers would return in a .B -ETIME CQE .I res value. .TP .B IORING_TIMEOUT_MULTISHOT The request will return multiple timeout completions. The completion flag IORING_CQE_F_MORE is set if more timeouts are expected. The value specified in .I count is the number of repeats. A value of 0 means the timeout is indefinite and can only be stopped by a removal request. Available since the 6.4 kernel. .PP The timeout completion event will trigger if either the specified timeout has occurred, or the specified number of events to wait for have been posted to the CQ ring. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .B 0 is returned. .TP .B -ETIME The specified timeout occurred and triggered the completion event. .TP .B -EINVAL One of the fields set in the SQE was invalid. For example, two clocksources where given, or the specified timeout seconds or nanoseconds where < 0. .TP .B -EFAULT io_uring was unable to access the data specified by .IR ts . .TP .B -ECANCELED The timeout was canceled by a removal request. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_timeout_remove (3), .BR io_uring_prep_timeout_update (3) liburing-2.6/man/io_uring_prep_timeout_remove.3000077700000000000000000000000001461424365000300542io_uring_prep_timeout_update.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_timeout_update.3000066400000000000000000000055701461424365000220200ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_poll_timeout_update 3 "March 12, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_timeout_update \- prepare a request to update an existing timeout .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_timeout_update(struct io_uring_sqe *" sqe "," .BI " struct __kernel_timespec *" ts "," .BI " __u64 " user_data "," .BI " unsigned " flags ");" .PP .BI "void io_uring_prep_timeout_remove(struct io_uring_sqe *" sqe "," .BI " __u64 " user_data "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP These functions modify or cancel an existing timeout request. The submission queue entry .I sqe is setup to arm a timeout update or removal specified by .I user_data and with modifier flags given by .IR flags . Additionally, the update request includes a .I ts structure, which contains new timeout information. For an update request, the .I flags member may contain a bitmask of the following values: .TP .B IORING_TIMEOUT_ABS The value specified in .I ts is an absolute value rather than a relative one. .TP .B IORING_TIMEOUT_BOOTTIME The boottime clock source should be used. .TP .B IORING_TIMEOUT_REALTIME The realtime clock source should be used. .TP .B IORING_TIMEOUT_ETIME_SUCCESS Consider an expired timeout a success in terms of the posted completion. Normally a timeout that triggers would return in a .B -ETIME CQE .I res value. .PP The timeout remove command does not currently accept any flags. .SH RETURN VALUE None .SH ERRORS These are the errors that are reported in the CQE .I res field. On success, .B 0 is returned. .TP .B -ENOENT The timeout identified by .I user_data could not be found. It may be invalid, or triggered before the update or removal request was processed. .TP .B -EALREADY The timeout identified by .I user_data is already firing and cannot be canceled. .TP .B -EINVAL One of the fields set in the SQE was invalid. For example, two clocksources where given, or the specified timeout seconds or nanoseconds where < 0. .TP .B -EFAULT io_uring was unable to access the data specified by .IR ts . .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_prep_timeout (3) liburing-2.6/man/io_uring_prep_unlink.3000077700000000000000000000000001461424365000251062io_uring_prep_unlinkat.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_prep_unlinkat.3000066400000000000000000000045021461424365000206070ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_unlinkat 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_prep_unlinkat \- prepare an unlinkat request .SH SYNOPSIS .nf .B #include .B #include .B #include .PP .BI "void io_uring_prep_unlinkat(struct io_uring_sqe *" sqe "," .BI " int " dirfd "," .BI " const char *" path "," .BI " int " flags ");" .PP .BI "void io_uring_prep_unlink(struct io_uring_sqe *" sqe "," .BI " const char *" path "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_unlinkat (3) function prepares an unlinkat request. The submission queue entry .I sqe is setup to use the directory file descriptor pointed to by .I dirfd to start an unlinkat operation on the path identified by .I path and using the flags given in .IR flags . The .BR io_uring_prep_unlink (3) function prepares an unlink request. The submission queue entry .I sqe is setup to start an unlinkat operation on the path identified by .I path relative to the current working directory and using the flags given in .IR flags . These functions prepare an async .BR unlinkat (2) or .BR unlink (2) request. See those man pages for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR unlinkat (2), .BR unlink (2) liburing-2.6/man/io_uring_prep_waitid.3000066400000000000000000000031371461424365000202460ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_waitid 3 "July 14, 2023" "liburing-2.5" "liburing Manual" .SH NAME io_uring_prep_waitid \- prepare a waitid request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_waitid(struct io_uring_sqe *" sqe "," .BI " idtype_t " idtype "," .BI " id_t " id "," .BI " siginfo_t *" infop "," .BI " int " options "," .BI " unsigned int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_waitid (3) function prepares a waitid request. The submission queue entry .I sqe is setup to use the .I idtype and .I id arguments select the child(ren), and .I options to specify the child state changes to wait for. Upon successful return, it fills .I infop with information of the child process, if any. .I flags is io_uring specific modifier flags. They are currently unused, and hence .B 0 should be passed. This function prepares an async .BR waitid (2) request. See that man page for details. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR waitid (2) liburing-2.6/man/io_uring_prep_write.3000066400000000000000000000035301461424365000201140ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_write 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_write \- prepare I/O write request .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_write(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const void *" buf "," .BI " unsigned " nbytes "," .BI " __u64 " offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_write (3) prepares an IO write request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start writing .I nbytes from the buffer .I buf at the specified .IR offset . On files that support seeking, if the offset is set to .BR -1 , the write operation commences at the file offset, and the file offset is incremented by the number of bytes written. See .BR write (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the write has been prepared, it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_write_fixed.3000066400000000000000000000035141461424365000212750ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_write 3 "February 13, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_write_fixed \- prepare I/O write request with registered buffer .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_prep_write_fixed(struct io_uring_sqe *" sqe "," .BI " int " fd ", .BI " const void *" buf "," .BI " unsigned " nbytes "," .BI " __u64 " offset "," .BI " int " buf_index ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_write_fixed (3) prepares an IO write request with a previously registered IO buffer. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start writing .I nbytes from the buffer .I buf at the specified .I offset and with the buffer matching the registered index of .IR buf_index . This works just like .BR io_uring_prep_write (3) except it requires the use of buffers that have been registered with .BR io_uring_register_buffers (3). The .I buf and .I nbytes arguments must fall within a region specified by .I buf_index in the previously registered buffer. The buffer need not be aligned with the start of the registered buffer. After the read has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH SEE ALSO .BR io_uring_prep_write (3), .BR io_uring_register_buffers (3) liburing-2.6/man/io_uring_prep_writev.3000066400000000000000000000052271461424365000203070ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_writev 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_writev \- prepare vector I/O write request .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_writev(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct iovec *" iovecs "," .BI " unsigned " nr_vecs "," .BI " __u64 " offset ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_writev (3) prepares a vectored IO write request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start writing .I nr_vecs from the .I iovecs array at the specified .IR offset . On files that support seeking, if the offset is set to .BR -1 , the write operation commences at the file offset, and the file offset is incremented by the number of bytes written. See .BR write (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the write has been prepared it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unless an application explicitly needs to pass in more than one iovec, it is more efficient to use .BR io_uring_prep_write (3) rather than this function, as no state has to be maintained for a non-vectored IO request. As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_write (3), .BR io_uring_prep_writev2 (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_prep_writev2.3000066400000000000000000000060531461424365000203670ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_prep_writev2 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_prep_writev2 \- prepare vector I/O write request with flags .SH SYNOPSIS .nf .B #include .B #include .PP .BI "void io_uring_prep_writev2(struct io_uring_sqe *" sqe "," .BI " int " fd "," .BI " const struct iovec *" iovecs "," .BI " unsigned " nr_vecs "," .BI " __u64 " offset "," .BI " int " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_writev2 (3) prepares a vectored IO write request. The submission queue entry .I sqe is setup to use the file descriptor .I fd to start writing .I nr_vecs from the .I iovecs array at the specified .IR offset . The behavior of the function can be controlled with the .I flags parameter. Supported values for .I flags are: .TP .B RWF_HIPRI High priority request, poll if possible .TP .B RWF_DSYNC per-IO O_DSYNC .TP .B RWF_SYNC per-IO O_SYNC .TP .B RWF_NOWAIT per-IO, return .B -EAGAIN if operation would block .TP .B RWF_APPEND per-IO O_APPEND .P On files that support seeking, if the offset is set to .BR -1 , the write operation commences at the file offset, and the file offset is incremented by the number of bytes written. See .BR write (2) for more details. Note that for an async API, reading and updating the current file offset may result in unpredictable behavior, unless access to the file is serialized. It is not encouraged to use this feature if it's possible to provide the desired IO offset from the application or library. On files that are not capable of seeking, the offset must be 0 or -1. After the write has been prepared, it can be submitted with one of the submit functions. .SH RETURN VALUE None .SH ERRORS The CQE .I res field will contain the result of the operation. See the related man page for details on possible values. Note that where synchronous system calls will return .B -1 on failure and set .I errno to the actual error value, io_uring never uses .IR errno . Instead it returns the negated .I errno directly in the CQE .I res field. .SH NOTES Unless an application explicitly needs to pass in more than one iovec, it is more efficient to use .BR io_uring_prep_write (3) rather than this function, as no state has to be maintained for a non-vectored IO request. As with any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_prep_write (3), .BR io_uring_prep_writev (3), .BR io_uring_submit (3) liburing-2.6/man/io_uring_queue_exit.3000066400000000000000000000013741461424365000201150ustar00rootroot00000000000000.\" Copyright (C) 2020 Jens Axboe .\" Copyright (C) 2020 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_queue_exit 3 "July 10, 2020" "liburing-0.7" "liburing Manual" .SH NAME io_uring_queue_exit \- tear down io_uring submission and completion queues .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_queue_exit(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP .BR io_uring_queue_exit (3) will release all resources acquired and initialized by .BR io_uring_queue_init (3). It first unmaps the memory shared between the application and the kernel and then closes the io_uring file descriptor. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_setup (2), .BR mmap (2), .BR io_uring_queue_init (3) liburing-2.6/man/io_uring_queue_init.3000066400000000000000000000101201461424365000200740ustar00rootroot00000000000000.\" Copyright (C) 2020 Jens Axboe .\" Copyright (C) 2020 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_queue_init 3 "July 10, 2020" "liburing-0.7" "liburing Manual" .SH NAME io_uring_queue_init \- setup io_uring submission and completion queues .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_queue_init(unsigned " entries "," .BI " struct io_uring *" ring "," .BI " unsigned " flags ");" .PP .BI "int io_uring_queue_init_params(unsigned " entries "," .BI " struct io_uring *" ring "," .BI " struct io_uring_params *" params ");" .PP .BI "int io_uring_queue_init_mem(unsigned " entries "," .BI " struct io_uring *" ring "," .BI " struct io_uring_params *" params "," .BI " void *" buf ", size_t " buf_size ");" .fi .SH DESCRIPTION .PP The .BR io_uring_queue_init (3) function executes the .BR io_uring_setup (2) system call to initialize the submission and completion queues in the kernel with at least .I entries entries in the submission queue and then maps the resulting file descriptor to memory shared between the application and the kernel. By default, the CQ ring will have twice the number of entries as specified by .I entries for the SQ ring. This is adequate for regular file or storage workloads, but may be too small for networked workloads. The SQ ring entries do not impose a limit on the number of in-flight requests that the ring can support, it merely limits the number that can be submitted to the kernel in one go (batch). If the CQ ring overflows, e.g. more entries are generated than fits in the ring before the application can reap them, then if the kernel supports .B IORING_FEAT_NODROP the ring enters a CQ ring overflow state. Otherwise it drops the CQEs and increments .I cq.koverflow in .I struct io_uring with the number of CQEs dropped. The overflow state is indicated by .B IORING_SQ_CQ_OVERFLOW being set in the SQ ring flags. Unless the kernel runs out of available memory, entries are not dropped, but it is a much slower completion path and will slow down request processing. For that reason it should be avoided and the CQ ring sized appropriately for the workload. Setting .I cq_entries in .I struct io_uring_params will tell the kernel to allocate this many entries for the CQ ring, independent of the SQ ring size in given in .IR entries . If the value isn't a power of 2, it will be rounded up to the nearest power of 2. On success, .BR io_uring_queue_init (3) returns 0 and .I ring will point to the shared memory containing the io_uring queues. On failure .BR -errno is returned. .I flags will be passed through to the io_uring_setup syscall (see .BR io_uring_setup (2)). The .BR io_uring_queue_init_params (3) and .BR io_uring_queue_init_mem (3) variants will pass the parameters indicated by .I params straight through to the .BR io_uring_setup (2) system call. The .BR io_uring_queue_init_mem (3) variant uses the provided .I buf with associated size .I buf_size as the memory for the ring, using the .B IORING_SETUP_NO_MMAP flag to .BR io_uring_setup (2). The buffer passed to .BR io_uring_queue_init_mem (3) must already be zeroed. Typically, the caller should allocate a huge page and pass that in to .BR io_uring_queue_init_mem (3). Pages allocated by mmap are already zeroed. .BR io_uring_queue_init_mem (3) returns the number of bytes used from the provided buffer, so that the app can reuse the buffer with the returned offset to put more rings in the same huge page. On success, the resources held by .I ring should be released via a corresponding call to .BR io_uring_queue_exit (3). .SH RETURN VALUE .BR io_uring_queue_init (3) and .BR io_uring_queue_init_params (3) return 0 on success and .BR -errno on failure. .BR io_uring_queue_init_mem (3) returns the number of bytes used from the provided buffer on success, and .BR -errno on failure. .SH SEE ALSO .BR io_uring_setup (2), .BR io_uring_register_ring_fd (3), .BR mmap (2), .BR io_uring_queue_exit (3) liburing-2.6/man/io_uring_queue_init_mem.3000077700000000000000000000000001461424365000250612io_uring_queue_init.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_queue_init_params.3000077700000000000000000000000001461424365000255662io_uring_queue_init.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_cmsg_firsthdr.3000077700000000000000000000000001461424365000266262io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_cmsg_nexthdr.3000077700000000000000000000000001461424365000264552io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_name.3000077700000000000000000000000001461424365000247102io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_out.3000066400000000000000000000054471461424365000203020ustar00rootroot00000000000000.\" Copyright (C), 2022 Dylan Yudaken .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_recvmsg_out 3 "July 26, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_recvmsg_out - access data from multishot recvmsg .SH SYNOPSIS .nf .B #include .PP .BI "struct io_uring_recvmsg_out *io_uring_recvmsg_validate(void *" buf "," .BI " int " buf_len "," .BI " struct msghdr *" msgh ");" .PP .BI "void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *" o ");" .PP .BI "struct cmsghdr *io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *" o "," .BI " struct msghdr *" msgh ");" .BI "struct cmsghdr *io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *" o "," .BI " struct msghdr *" msgh "," .BI " struct cmsghdr *" cmsg ");" .PP .BI "void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *" o "," .BI " struct msghdr *" msgh ");" .BI "unsigned int io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *" o "," .BI " int " buf_len "," .BI " struct msghdr *" msgh ");" .PP .fi .SH DESCRIPTION These functions are used to access data in the payload delivered by .BR io_uring_prep_recvmsg_multishot (3). .PP .I msgh should point to the .I struct msghdr submitted with the request. .PP .BR io_uring_recvmsg_validate (3) will validate a buffer delivered by .BR io_uring_prep_recvmsg_multishot (3) and extract the .I io_uring_recvmsg_out if it is valid, returning a pointer to it or else NULL. .PP The structure is defined as follows: .PP .in +4n .EX struct io_uring_recvmsg_out { __u32 namelen; /* Name byte count as would have been populated * by recvmsg(2) */ __u32 controllen; /* Control byte count */ __u32 payloadlen; /* Payload byte count as would have been returned * by recvmsg(2) */ __u32 flags; /* Flags result as would have been populated * by recvmsg(2) */ }; .IP * 3 .BR io_uring_recvmsg_name (3) returns a pointer to the name in the buffer. .IP * .BR io_uring_recvmsg_cmsg_firsthdr (3) returns a pointer to the first cmsg in the buffer, or NULL. .IP * .BR io_uring_recvmsg_cmsg_nexthdr (3) returns a pointer to the next cmsg in the buffer, or NULL. .IP * .BR io_uring_recvmsg_payload (3) returns a pointer to the payload in the buffer. .IP * .BR io_uring_recvmsg_payload_length (3) Calculates the usable payload length in bytes. .SH "SEE ALSO" .BR io_uring_prep_recvmsg_multishot (3) liburing-2.6/man/io_uring_recvmsg_payload.3000077700000000000000000000000001461424365000254212io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_payload_length.3000077700000000000000000000000001461424365000267622io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_recvmsg_validate.3000077700000000000000000000000001461424365000255612io_uring_recvmsg_out.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register.2000066400000000000000000000545451461424365000175730ustar00rootroot00000000000000.\" Copyright (C) 2019 Jens Axboe .\" Copyright (C) 2019 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register 2 2019-01-17 "Linux" "Linux Programmer's Manual" .SH NAME io_uring_register \- register files or user buffers for asynchronous I/O .SH SYNOPSIS .nf .BR "#include " .PP .BI "int io_uring_register(unsigned int " fd ", unsigned int " opcode , .BI " void *" arg ", unsigned int " nr_args ); .fi .PP .SH DESCRIPTION .PP The .BR io_uring_register (2) system call registers resources (e.g. user buffers, files, eventfd, personality, restrictions) for use in an .BR io_uring (7) instance referenced by .IR fd . Registering files or user buffers allows the kernel to take long term references to internal data structures or create long term mappings of application memory, greatly reducing per-I/O overhead. .I fd is the file descriptor returned by a call to .BR io_uring_setup (2). If .I opcode has the flag .B IORING_REGISTER_USE_REGISTERED_RING ored into it, .I fd is instead the index of a registered ring fd. .I opcode can be one of: .TP .B IORING_REGISTER_BUFFERS .I arg points to a .I struct iovec array of .I nr_args entries. The buffers associated with the iovecs will be locked in memory and charged against the user's .B RLIMIT_MEMLOCK resource limit. See .BR getrlimit (2) for more information. Additionally, there is a size limit of 1GiB per buffer. Currently, the buffers must be anonymous, non-file-backed memory, such as that returned by .BR malloc (3) or .BR mmap (2) with the .B MAP_ANONYMOUS flag set. It is expected that this limitation will be lifted in the future. Huge pages are supported as well. Note that the entire huge page will be pinned in the kernel, even if only a portion of it is used. After a successful call, the supplied buffers are mapped into the kernel and eligible for I/O. To make use of them, the application must specify the .B IORING_OP_READ_FIXED or .B IORING_OP_WRITE_FIXED opcodes in the submission queue entry (see the .I struct io_uring_sqe definition in .BR io_uring_enter (2)), and set the .I buf_index field to the desired buffer index. The memory range described by the submission queue entry's .I addr and .I len fields must fall within the indexed buffer. It is perfectly valid to setup a large buffer and then only use part of it for an I/O, as long as the range is within the originally mapped region. An application can increase or decrease the size or number of registered buffers by first unregistering the existing buffers, and then issuing a new call to .BR io_uring_register (2) with the new buffers. Note that before 5.13 registering buffers would wait for the ring to idle. If the application currently has requests in-flight, the registration will wait for those to finish before proceeding. An application need not unregister buffers explicitly before shutting down the io_uring instance. Note, however, that shutdown processing may run asynchronously within the kernel. As a result, it is not guaranteed that pages are immediately unpinned in this case. Available since 5.1. .TP .B IORING_REGISTER_BUFFERS2 Register buffers for I/O. Similar to .B IORING_REGISTER_BUFFERS but aims to have a more extensible ABI. .I arg points to a .I struct io_uring_rsrc_register, and .I nr_args should be set to the number of bytes in the structure. .PP .in +8n .EX struct io_uring_rsrc_register { __u32 nr; __u32 resv; __u64 resv2; __aligned_u64 data; __aligned_u64 tags; }; .EE .in .PP .in +8n The .I data field contains a pointer to a .I struct iovec array of .I nr entries. The .I tags field should either be 0, then tagging is disabled, or point to an array of .I nr "tags" (unsigned 64 bit integers). If a tag is zero, then tagging for this particular resource (a buffer in this case) is disabled. Otherwise, after the resource had been unregistered and it's not used anymore, a CQE will be posted with .I user_data set to the specified tag and all other fields zeroed. Note that resource updates, e.g. .B IORING_REGISTER_BUFFERS_UPDATE, don't necessarily deallocate resources by the time it returns, but they might be held alive until all requests using it complete. Available since 5.13. .TP .B IORING_REGISTER_BUFFERS_UPDATE Updates registered buffers with new ones, either turning a sparse entry into a real one, or replacing an existing entry. .I arg must contain a pointer to a struct io_uring_rsrc_update2, which contains an offset on which to start the update, and an array of .I struct iovec. .I tags points to an array of tags. .I nr must contain the number of descriptors in the passed in arrays. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. .PP .in +8n .EX struct io_uring_rsrc_update2 { __u32 offset; __u32 resv; __aligned_u64 data; __aligned_u64 tags; __u32 nr; __u32 resv2; }; .EE .in .PP .in +8n Available since 5.13. .TP .B IORING_UNREGISTER_BUFFERS This operation takes no argument, and .I arg must be passed as NULL. All previously registered buffers associated with the io_uring instance will be released synchronously. Available since 5.1. .TP .B IORING_REGISTER_FILES Register files for I/O. .I arg contains a pointer to an array of .I nr_args file descriptors (signed 32 bit integers). To make use of the registered files, the .B IOSQE_FIXED_FILE flag must be set in the .I flags member of the .IR "struct io_uring_sqe" , and the .I fd member is set to the index of the file in the file descriptor array. The file set may be sparse, meaning that the .B fd field in the array may be set to .B -1. See .B IORING_REGISTER_FILES_UPDATE for how to update files in place. Note that before 5.13 registering files would wait for the ring to idle. If the application currently has requests in-flight, the registration will wait for those to finish before proceeding. See .B IORING_REGISTER_FILES_UPDATE for how to update an existing set without that limitation. Files are automatically unregistered when the io_uring instance is torn down. An application needs only unregister if it wishes to register a new set of fds. Available since 5.1. .TP .B IORING_REGISTER_FILES2 Register files for I/O. Similar to .B IORING_REGISTER_FILES. .I arg points to a .I struct io_uring_rsrc_register, and .I nr_args should be set to the number of bytes in the structure. The .I data field contains a pointer to an array of .I nr file descriptors (signed 32 bit integers). .I tags field should either be 0 or or point to an array of .I nr "tags" (unsigned 64 bit integers). See .B IORING_REGISTER_BUFFERS2 for more info on resource tagging. Note that resource updates, e.g. .B IORING_REGISTER_FILES_UPDATE, don't necessarily deallocate resources, they might be held until all requests using that resource complete. Available since 5.13. .TP .B IORING_REGISTER_FILES_UPDATE This operation replaces existing files in the registered file set with new ones, either turning a sparse entry (one where fd is equal to .B -1 ) into a real one, removing an existing entry (new one is set to .B -1 ), or replacing an existing entry with a new existing entry. .I arg must contain a pointer to a .I struct io_uring_files_update, which contains an offset on which to start the update, and an array of file descriptors to use for the update. .I nr_args must contain the number of descriptors in the passed in array. Available since 5.5. File descriptors can be skipped if they are set to .B IORING_REGISTER_FILES_SKIP. Skipping an fd will not touch the file associated with the previous fd at that index. Available since 5.12. .TP .B IORING_REGISTER_FILES_UPDATE2 Similar to IORING_REGISTER_FILES_UPDATE, replaces existing files in the registered file set with new ones, either turning a sparse entry (one where fd is equal to .B -1 ) into a real one, removing an existing entry (new one is set to .B -1 ), or replacing an existing entry with a new existing entry. .I arg must contain a pointer to a .I struct io_uring_rsrc_update2, which contains an offset on which to start the update, and an array of file descriptors to use for the update stored in .I data. .I tags points to an array of tags. .I nr must contain the number of descriptors in the passed in arrays. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. Available since 5.13. .TP .B IORING_UNREGISTER_FILES This operation requires no argument, and .I arg must be passed as NULL. All previously registered files associated with the io_uring instance will be unregistered. Available since 5.1. .TP .B IORING_REGISTER_EVENTFD It's possible to use eventfd(2) to get notified of completion events on an io_uring instance. If this is desired, an eventfd file descriptor can be registered through this operation. .I arg must contain a pointer to the eventfd file descriptor, and .I nr_args must be 1. Note that while io_uring generally takes care to avoid spurious events, they can occur. Similarly, batched completions of CQEs may only trigger a single eventfd notification even if multiple CQEs are posted. The application should make no assumptions on number of events being available having a direct correlation to eventfd notifications posted. An eventfd notification must thus only be treated as a hint to check the CQ ring for completions. Available since 5.2. An application can temporarily disable notifications, coming through the registered eventfd, by setting the .B IORING_CQ_EVENTFD_DISABLED bit in the .I flags field of the CQ ring. Available since 5.8. .TP .B IORING_REGISTER_EVENTFD_ASYNC This works just like .B IORING_REGISTER_EVENTFD , except notifications are only posted for events that complete in an async manner. This means that events that complete inline while being submitted do not trigger a notification event. The arguments supplied are the same as for .B IORING_REGISTER_EVENTFD. Available since 5.6. .TP .B IORING_UNREGISTER_EVENTFD Unregister an eventfd file descriptor to stop notifications. Since only one eventfd descriptor is currently supported, this operation takes no argument, and .I arg must be passed as NULL and .I nr_args must be zero. Available since 5.2. .TP .B IORING_REGISTER_PROBE This operation returns a structure, io_uring_probe, which contains information about the opcodes supported by io_uring on the running kernel. .I arg must contain a pointer to a struct io_uring_probe, and .I nr_args must contain the size of the ops array in that probe struct. The ops array is of the type io_uring_probe_op, which holds the value of the opcode and a flags field. If the flags field has .B IO_URING_OP_SUPPORTED set, then this opcode is supported on the running kernel. Available since 5.6. .TP .B IORING_REGISTER_PERSONALITY This operation registers credentials of the running application with io_uring, and returns an id associated with these credentials. Applications wishing to share a ring between separate users/processes can pass in this credential id in the sqe .B personality field. If set, that particular sqe will be issued with these credentials. Must be invoked with .I arg set to NULL and .I nr_args set to zero. Available since 5.6. .TP .B IORING_UNREGISTER_PERSONALITY This operation unregisters a previously registered personality with io_uring. .I nr_args must be set to the id in question, and .I arg must be set to NULL. Available since 5.6. .TP .B IORING_REGISTER_ENABLE_RINGS This operation enables an io_uring ring started in a disabled state .RB (IORING_SETUP_R_DISABLED was specified in the call to .BR io_uring_setup (2)). While the io_uring ring is disabled, submissions are not allowed and registrations are not restricted. After the execution of this operation, the io_uring ring is enabled: submissions and registration are allowed, but they will be validated following the registered restrictions (if any). This operation takes no argument, must be invoked with .I arg set to NULL and .I nr_args set to zero. Available since 5.10. .TP .B IORING_REGISTER_RESTRICTIONS .I arg points to a .I struct io_uring_restriction array of .I nr_args entries. With an entry it is possible to allow an .BR io_uring_register (2) .I opcode, or specify which .I opcode and .I flags of the submission queue entry are allowed, or require certain .I flags to be specified (these flags must be set on each submission queue entry). All the restrictions must be submitted with a single .BR io_uring_register (2) call and they are handled as an allowlist (opcodes and flags not registered, are not allowed). Restrictions can be registered only if the io_uring ring started in a disabled state .RB (IORING_SETUP_R_DISABLED must be specified in the call to .BR io_uring_setup (2)). Available since 5.10. .TP .B IORING_REGISTER_IOWQ_AFF By default, async workers created by io_uring will inherit the CPU mask of its parent. This is usually all the CPUs in the system, unless the parent is being run with a limited set. If this isn't the desired outcome, the application may explicitly tell io_uring what CPUs the async workers may run on. .I arg must point to a .B cpu_set_t mask, and .I nr_args the byte size of that mask. Available since 5.14. .TP .B IORING_UNREGISTER_IOWQ_AFF Undoes a CPU mask previously set with .B IORING_REGISTER_IOWQ_AFF. Must not have .I arg or .I nr_args set. Available since 5.14. .TP .B IORING_REGISTER_IOWQ_MAX_WORKERS By default, io_uring limits the unbounded workers created to the maximum processor count set by .I RLIMIT_NPROC and the bounded workers is a function of the SQ ring size and the number of CPUs in the system. Sometimes this can be excessive (or too little, for bounded), and this command provides a way to change the count per ring (per NUMA node) instead. .I arg must be set to an .I unsigned int pointer to an array of two values, with the values in the array being set to the maximum count of workers per NUMA node. Index 0 holds the bounded worker count, and index 1 holds the unbounded worker count. On successful return, the passed in array will contain the previous maximum values for each type. If the count being passed in is 0, then this command returns the current maximum values and doesn't modify the current setting. .I nr_args must be set to 2, as the command takes two values. Available since 5.15. .TP .B IORING_REGISTER_RING_FDS Whenever .BR io_uring_enter (2) is called to submit request or wait for completions, the kernel must grab a reference to the file descriptor. If the application using io_uring is threaded, the file table is marked as shared, and the reference grab and put of the file descriptor count is more expensive than it is for a non-threaded application. Similarly to how io_uring allows registration of files, this allow registration of the ring file descriptor itself. This reduces the overhead of the .BR io_uring_enter (2) system call. .I arg must be set to an unsigned int pointer to an array of type .I struct io_uring_rsrc_register of .I nr_args number of entries. The .B data field of this struct must point to an io_uring file descriptor, and the .B offset field can be either .B -1 or an explicit offset desired for the registered file descriptor value. If .B -1 is used, then upon successful return of this system call, the field will contain the value of the registered file descriptor to be used for future .BR io_uring_enter (2) system calls. On successful completion of this request, the returned descriptors may be used instead of the real file descriptor for .BR io_uring_enter (2), provided that .B IORING_ENTER_REGISTERED_RING is set in the .I flags for the system call. This flag tells the kernel that a registered descriptor is used rather than a real file descriptor. Each thread or process using a ring must register the file descriptor directly by issuing this request. The maximum number of supported registered ring descriptors is currently limited to .B 16. Available since 5.18. .TP .B IORING_UNREGISTER_RING_FDS Unregister descriptors previously registered with .B IORING_REGISTER_RING_FDS. .I arg must be set to an unsigned int pointer to an array of type .I struct io_uring_rsrc_register of .I nr_args number of entries. Only the .B offset field should be set in the structure, containing the registered file descriptor offset previously returned from .B IORING_REGISTER_RING_FDS that the application wishes to unregister. Note that this isn't done automatically on ring exit, if the thread or task that previously registered a ring file descriptor isn't exiting. It is recommended to manually unregister any previously registered ring descriptors if the ring is closed and the task persists. This will free up a registration slot, making it available for future use. Available since 5.18. .TP .B IORING_REGISTER_PBUF_RING Registers a shared buffer ring to be used with provided buffers. This is a newer alternative to using .B IORING_OP_PROVIDE_BUFFERS which is more efficient, to be used with request types that support the .B IOSQE_BUFFER_SELECT flag. The .I arg argument must be filled in with the appropriate information. It looks as follows: .PP .in +12n .EX struct io_uring_buf_reg { __u64 ring_addr; __u32 ring_entries; __u16 bgid; __u16 pad; __u64 resv[3]; }; .EE .in .PP .in +8n The .I ring_addr field must contain the address to the memory allocated to fit this ring. The memory must be page aligned and hence allocated appropriately using eg .BR posix_memalign (3) or similar. The size of the ring is the product of .I ring_entries and the size of .IR "struct io_uring_buf" . .I ring_entries is the desired size of the ring, and must be a power-of-2 in size. The maximum size allowed is 2^15 (32768). .I bgid is the buffer group ID associated with this ring. SQEs that select a buffer have a buffer group associated with them in their .I buf_group field, and the associated CQEs will have .B IORING_CQE_F_BUFFER set in their .I flags member, which will also contain the specific ID of the buffer selected. The rest of the fields are reserved and must be cleared to zero. .I nr_args must be set to 1. Also see .BR io_uring_register_buf_ring (3) for more details. Available since 5.19. .TP .B IORING_UNREGISTER_PBUF_RING Unregister a previously registered provided buffer ring. .I arg must be set to the address of a struct io_uring_buf_reg, with just the .I bgid field set to the buffer group ID of the previously registered provided buffer group. .I nr_args must be set to 1. Also see .B IORING_REGISTER_PBUF_RING . Available since 5.19. .TP .B IORING_REGISTER_SYNC_CANCEL Performs a synchronous cancelation request, which works in a similar fashion to .B IORING_OP_ASYNC_CANCEL except it completes inline. This can be useful for scenarios where cancelations should happen synchronously, rather than needing to issue an SQE and wait for completion of that specific CQE. .I arg must be set to a pointer to a struct io_uring_sync_cancel_reg structure, with the details filled in for what request(s) to target for cancelation. See .BR io_uring_register_sync_cancel (3) for details on that. The return values are the same, except they are passed back synchronously rather than through the CQE .I res field. .I nr_args must be set to 1. Available since 6.0. .TP .B IORING_REGISTER_FILE_ALLOC_RANGE sets the allowable range for fixed file index allocations within the kernel. When requests that can instantiate a new fixed file are used with .B IORING_FILE_INDEX_ALLOC , the application is asking the kernel to allocate a new fixed file descriptor rather than pass in a specific value for one. By default, the kernel will pick any available fixed file descriptor within the range available. This effectively allows the application to set aside a range just for dynamic allocations, with the remainder being used for specific values. .I nr_args must be set to 1 and .I arg must be set to a pointer to a struct io_uring_file_index_range: .PP .in +12n .EX struct io_uring_file_index_range { __u32 off; __u32 len; __u64 resv; }; .EE .in .PP .in +8n with .I off being set to the starting value for the range, and .I len being set to the number of descriptors. The reserved .I resv field must be cleared to zero. The application must have registered a file table first. Available since 6.0. .SH RETURN VALUE On success, .BR io_uring_register (2) returns either 0 or a positive value, depending on the .I opcode used. On error, a negative error value is returned. The caller should not rely on the .I errno variable. .SH ERRORS .TP .B EACCES The .I opcode field is not allowed due to registered restrictions. .TP .B EBADF One or more fds in the .I fd array are invalid. .TP .B EBADFD .B IORING_REGISTER_ENABLE_RINGS or .B IORING_REGISTER_RESTRICTIONS was specified, but the io_uring ring is not disabled. .TP .B EBUSY .B IORING_REGISTER_BUFFERS or .B IORING_REGISTER_FILES or .B IORING_REGISTER_RESTRICTIONS was specified, but there were already buffers, files, or restrictions registered. .TP .B EEXIST The thread performing the registration is invalid. .TP .B EFAULT buffer is outside of the process' accessible address space, or .I iov_len is greater than 1GiB. .TP .B EINVAL .B IORING_REGISTER_BUFFERS or .B IORING_REGISTER_FILES was specified, but .I nr_args is 0. .TP .B EINVAL .B IORING_REGISTER_BUFFERS was specified, but .I nr_args exceeds .B UIO_MAXIOV .TP .B EINVAL .B IORING_UNREGISTER_BUFFERS or .B IORING_UNREGISTER_FILES was specified, and .I nr_args is non-zero or .I arg is non-NULL. .TP .B EINVAL .B IORING_REGISTER_RESTRICTIONS was specified, but .I nr_args exceeds the maximum allowed number of restrictions or restriction .I opcode is invalid. .TP .B EMFILE .B IORING_REGISTER_FILES was specified and .I nr_args exceeds the maximum allowed number of files in a fixed file set. .TP .B EMFILE .B IORING_REGISTER_FILES was specified and adding .I nr_args file references would exceed the maximum allowed number of files the user is allowed to have according to the .B RLIMIT_NOFILE resource limit and the caller does not have .B CAP_SYS_RESOURCE capability. Note that this is a per user limit, not per process. .TP .B ENOMEM Insufficient kernel resources are available, or the caller had a non-zero .B RLIMIT_MEMLOCK soft resource limit, but tried to lock more memory than the limit permitted. This limit is not enforced if the process is privileged .RB ( CAP_IPC_LOCK ). .TP .B ENXIO .B IORING_UNREGISTER_BUFFERS or .B IORING_UNREGISTER_FILES was specified, but there were no buffers or files registered. .TP .B ENXIO Attempt to register files or buffers on an io_uring instance that is already undergoing file or buffer registration, or is being torn down. .TP .B EOPNOTSUPP User buffers point to file-backed memory. .TP .B EFAULT User buffers point to file-backed memory (newer kernels). liburing-2.6/man/io_uring_register_buf_ring.3000066400000000000000000000070431461424365000214360ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_buf_ring 3 "May 18, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_register_buf_ring \- register buffer ring for provided buffers .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_buf_ring(struct io_uring *" ring ", .BI " struct io_uring_buf_reg *" reg ", .BI " unsigned int " flags ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_register_buf_ring (3) function registers a shared buffer ring to be used with provided buffers. For the request types that support it, provided buffers are given to the ring and one is selected by a request if it has .B IOSQE_BUFFER_SELECT set in the SQE .IR flags , when the request is ready to receive data. This allows both clear ownership of the buffer lifetime, and a way to have more read/receive type of operations in flight than buffers available. The .I reg argument must be filled in with the appropriate information. It looks as follows: .PP .in +4n .EX struct io_uring_buf_reg { __u64 ring_addr; __u32 ring_entries; __u16 bgid; __u16 pad; __u64 resv[3]; }; .EE .in .PP The .I ring_addr field must contain the address to the memory allocated to fit this ring. The memory must be page aligned and hence allocated appropriately using eg .BR posix_memalign (3) or similar. The size of the ring is the product of .I ring_entries and the size of .IR "struct io_uring_buf" . .I ring_entries is the desired size of the ring, and must be a power-of-2 in size. The maximum size allowed is 2^15 (32768). .I bgid is the buffer group ID associated with this ring. SQEs that select a buffer have a buffer group associated with them in their .I buf_group field, and the associated CQEs will have .B IORING_CQE_F_BUFFER set in their .I flags member, which will also contain the specific ID of the buffer selected. The rest of the fields are reserved and must be cleared to zero. The .I flags argument is currently unused and must be set to zero. A shared buffer ring looks as follows: .PP .in +4n .EX struct io_uring_buf_ring { union { struct { __u64 resv1; __u32 resv2; __u16 resv3; __u16 tail; }; struct io_uring_buf bufs[0]; }; }; .EE .in .PP where .I tail is the index at which the application can insert new buffers for consumption by requests, and .I struct io_uring_buf is buffer definition: .PP .in +4n .EX struct io_uring_buf { __u64 addr; __u32 len; __u16 bid; __u16 resv; }; .EE .in .PP where .I addr is the address for the buffer, .I len is the length of the buffer in bytes, and .I bid is the buffer ID that will be returned in the CQE once consumed. Reserved fields must not be touched. Applications must use .BR io_uring_buf_ring_init (3) to initialise the buffer ring before use. Applications may use .BR io_uring_buf_ring_add (3) and .BR io_uring_buf_ring_advance (3) or .BR io_uring_buf_ring_cq_advance (3) to provide buffers, which will set these fields and update the tail. Available since 5.19. .SH RETURN VALUE On success .BR io_uring_register_buf_ring (3) returns 0. On failure it returns .BR -errno . .SH NOTES Unless manual setup is needed, it's recommended to use .BR io_uring_setup_buf_ring (3) as it provides a simpler way to setup a provided buffer ring. .SH SEE ALSO .BR io_uring_buf_ring_init (3), .BR io_uring_buf_ring_add (3), .BR io_uring_setup_buf_ring (3), .BR io_uring_buf_ring_advance (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_register_buffers.3000066400000000000000000000062741461424365000213040ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_buffers 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_register_buffers \- register buffers for fixed buffer operations .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_buffers(struct io_uring *" ring "," .BI " const struct iovec *" iovecs "," .BI " unsigned " nr_iovecs ");" .PP .BI "int io_uring_register_buffers_tags(struct io_uring *" ring "," .BI " const struct iovec *" iovecs "," .BI " const __u64 *" tags "," .BI " unsigned " nr ");" .PP .BI "int io_uring_register_buffers_sparse(struct io_uring *" ring "," .BI " unsigned " nr_iovecs ");" .PP .BI "int io_uring_register_buffers_update_tag(struct io_uring *" ring "," .BI " unsigned " off "," .BI " const struct iovec *" iovecs "," .BI " const __u64 *" tags "," .BI " unsigned " nr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_register_buffers (3) function registers .I nr_iovecs number of buffers defined by the array .I iovecs belonging to the .IR ring . The .BR io_uring_register_buffers_tags (3) function behaves the same as .BR io_uring_register_buffers (3) function but additionally takes .I tags parameter. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. The .BR io_uring_register_buffers_sparse (3) function registers .I nr_iovecs empty buffers belonging to the .IR ring . These buffers must be updated before use, using eg .BR io_uring_register_buffers_update_tag (3). After the caller has registered the buffers, they can be used with one of the fixed buffers functions. Registered buffers is an optimization that is useful in conjunction with .B O_DIRECT reads and writes, where it maps the specified range into the kernel once when the buffer is registered rather than doing a map and unmap for each IO every time IO is performed to that region. Additionally, it also avoids manipulating the page reference counts for each IO. The .BR io_uring_register_buffers_update_tag (3) function updates registered buffers with new ones, either turning a sparse entry into a real one, or replacing an existing entry. The .I off is offset on which to start the update .I nr number of buffers defined by the array .I iovecs belonging to the .IR ring . The .I tags points to an array of tags. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. .SH RETURN VALUE On success .BR io_uring_register_buffers (3), .BR io_uring_register_buffers_tags (3) and .BR io_uring_register_buffers_sparse (3) return 0. .BR io_uring_register_buffers_update_tag (3) return number of buffers updated. On failure they return .BR -errno . .SH SEE ALSO .BR io_uring_register (2), .BR io_uring_get_sqe (3), .BR io_uring_unregister_buffers (3), .BR io_uring_register_buf_ring (3), .BR io_uring_prep_read_fixed (3), .BR io_uring_prep_write_fixed (3) liburing-2.6/man/io_uring_register_buffers_sparse.3000077700000000000000000000000001461424365000301622io_uring_register_buffers.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_buffers_tags.3000077700000000000000000000000001461424365000276232io_uring_register_buffers.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_buffers_update_tag.3000077700000000000000000000000001461424365000310022io_uring_register_buffers.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_eventfd.3000066400000000000000000000033331461424365000212740ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_eventfd 3 "April 16, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_register_eventfd \- register an eventfd with a ring .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_eventfd(struct io_uring *" ring "," .BI " int " fd ");" .PP .BI "int io_uring_register_eventfd_async(struct io_uring *" ring "," .BI " int " fd ");" .PP .BI "int io_uring_unregister_eventfd(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP .BR io_uring_register_eventfd (3) registers the eventfd file descriptor .I fd with the ring identified by .IR ring . Whenever completions are posted to the CQ ring, an eventfd notification is generated with the registered eventfd descriptor. If .BR io_uring_register_eventfd_async (3) is used, only events that completed out-of-line will trigger a notification. It notifications are no longer desired, .BR io_uring_unregister_eventfd (3) may be called to remove the eventfd registration. No eventfd argument is needed, as a ring can only have a single eventfd registered. .SH NOTES While io_uring generally takes care to avoid spurious events, they can occur. Similarly, batched completions of CQEs may only trigger a single eventfd notification even if multiple CQEs are posted. The application should make no assumptions on number of events being available having a direct correlation to eventfd notifications posted. An eventfd notification must thus only be treated as a hint to check the CQ ring for completions. .SH RETURN VALUE Returns 0 on success, or or .BR -errno on error. .SH SEE ALSO .BR eventfd (2) liburing-2.6/man/io_uring_register_eventfd_async.3000077700000000000000000000000001461424365000300002io_uring_register_eventfd.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_file_alloc_range.3000066400000000000000000000033461461424365000231120ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_file_alloc_range 3 "Oct 21, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_register_file_alloc_range \- set range for fixed file allocations .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_file_alloc_range(struct io_uring *" ring ", .BI " unsigned " off "," .BI " unsigned " len ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_register_file_alloc_range (3) function sets the allowable range for fixed file index allocations within the kernel. When requests that can instantiate a new fixed file are used with .B IORING_FILE_INDEX_ALLOC , the application is asking the kernel to allocate a new fixed file descriptor rather than pass in a specific value for one. By default, the kernel will pick any available fixed file descriptor within the range available. Calling this function with .I off set to the starting offset and .I len set to the number of descriptors, the application can limit the allocated descriptors to that particular range. This effectively allows the application to set aside a range just for dynamic allocations, with the remainder being used for specific values. The application must have registered a fixed file table upfront, eg through .BR io_uring_register_files (3) or .BR io_uring_register_files_sparse (3) . Available since 6.0. .SH RETURN VALUE On success .BR io_uring_register_buf_ring (3) returns 0. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_register_files (3) .BR io_uring_prep_accept_direct (3) .BR io_uring_prep_openat_direct (3) .BR io_uring_prep_socket_direct (3) liburing-2.6/man/io_uring_register_files.3000066400000000000000000000067271461424365000207550ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_files 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_register_files \- register file descriptors .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_files(struct io_uring *" ring "," .BI " const int *" files "," .BI " unsigned " nr_files ");" .PP .BI "int io_uring_register_files_tags(struct io_uring *" ring "," .BI " const int *" files "," .BI " const __u64 *" tags "," .BI " unsigned " nr ");" .PP .BI "int io_uring_register_files_sparse(struct io_uring *" ring "," .BI " unsigned " nr_files ");" .PP .BI "int io_uring_register_files_update(struct io_uring *" ring "," .BI " unsigned " off "," .BI " const int *" files "," .BI " unsigned " nr_files ");" .PP .BI "int io_uring_register_files_update_tag(struct io_uring *" ring "," .BI " unsigned " off "," .BI " const int *" files "," .BI " const __u64 *" tags "," .BI " unsigned " nr_files ");" .fi .SH DESCRIPTION .PP The .BR io_uring_register_files (3) function registers .I nr_files number of file descriptors defined by the array .I files belonging to the .I ring for subsequent operations. The .BR io_uring_register_files_tags (3) function behaves the same as .BR io_uring_register_files (3) function but additionally takes .I tags parameter. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. The .BR io_uring_register_files_sparse (3) function registers an empty file table of .I nr_files number of file descriptors. These files must be updated before use, using eg .BR io_uring_register_files_update_tag (3). The sparse variant is available in kernels 5.19 and later. Registering a file table is a prerequisite for using any request that uses direct descriptors. Registered files have less overhead per operation than normal files. This is due to the kernel grabbing a reference count on a file when an operation begins, and dropping it when it's done. When the process file table is shared, for example if the process has ever created any threads, then this cost goes up even more. Using registered files reduces the overhead of file reference management across requests that operate on a file. The .BR io_uring_register_files_update (3) function updates existing registered files. The .I off is offset on which to start the update .I nr_files number of files defined by the array .I files belonging to the .IR ring . The .BR io_uring_register_files_update_tag (3) function behaves the same as .BR io_uring_register_files_update (3) function but additionally takes .I tags parameter. See .B IORING_REGISTER_BUFFERS2 for the resource tagging description. .SH RETURN VALUE On success .BR io_uring_register_files (3), .BR io_uring_register_files_tags (3) and .BR io_uring_register_files_sparse (3) return 0. .BR io_uring_register_files_update (3) and .BR io_uring_register_files_update_tag (3) return number of files updated. On failure they return .BR -errno . .SH SEE ALSO .BR io_uring_register (2), .BR io_uring_get_sqe (3), .BR io_uring_unregister_files (3) liburing-2.6/man/io_uring_register_files_sparse.3000077700000000000000000000000001461424365000272762io_uring_register_files.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_files_tags.3000077700000000000000000000000001461424365000267372io_uring_register_files.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_files_update.3000077700000000000000000000000001461424365000272632io_uring_register_files.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_files_update_tag.3000077700000000000000000000000001461424365000301162io_uring_register_files.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_register_iowq_aff.3000066400000000000000000000032241461424365000214330ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_iowq_aff 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_register_iowq_aff \- register async worker CPU affinities .SH SYNOPSIS .nf .B #include .B #include .PP .BI "int io_uring_register_iowq_aff(struct io_uring *" ring "," .BI " size_t " cpusz "," .BI " const cpu_set_t *" mask "); .PP .BI "void io_uring_unregister_iowq_aff(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_prep_register_iowq_aff (3) function registers a set of CPU affinities to be used by the io_uring async workers. By default, io_uring async workers are allowed to run on any CPU in the system. If this function is called with .I ring set to the ring in question and .I mask set to a pointer to a .B cpu_set_t value and .I cpusz set to the size of the CPU set, then async workers will only be allowed to run on the CPUs specified in the mask. Existing workers may need to hit a schedule point before they are migrated. For unregistration, .BR io_uring_unregister_iowq_aff (3) may be called to restore CPU affinities to the default. .SH RETURN VALUE Returns .B 0 on success, or any of the following values in case of error. .TP .B -EFAULT The kernel was unable to copy the memory pointer to by .I mask as it was invalid. .TP .B -ENOMEM The kernel was unable to allocate memory for the new CPU mask. .TP .B -EINVAL .I cpusz or .I mask was NULL/0, or any other value specified was invalid. .SH SEE ALSO .BR io_uring_queue_init (3), .BR io_uring_register (2) liburing-2.6/man/io_uring_register_iowq_max_workers.3000066400000000000000000000042531461424365000232430ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_iowq_max_workers 3 "March 13, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_register_iowq_max_workers \- modify the maximum allowed async workers .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_iowq_max_workers(struct io_uring *" ring "," .BI " unsigned int *" values ");" .fi .SH DESCRIPTION .PP io_uring async workers are split into two types: .TP .B Bounded These workers have a bounded execution time. Examples of that are filesystem reads, which normally complete in a relatively short amount of time. In case of disk failures, they are still bounded by a timeout operation that will abort them if exceeded. .TP .B Unbounded Work items here may take an indefinite amount of time to complete. Examples include doing IO to sockets, pipes, or any other non-regular type of file. .PP By default, the amount of bounded IO workers is limited to how many SQ entries the ring was setup with, or 4 times the number of online CPUs in the system, whichever is smaller. Unbounded workers are only limited by the process task limit, as indicated by the rlimit .B RLIMIT_NPROC limit. This can be modified by calling .B io_uring_register_iowq_max_workers with .I ring set to the ring in question, and .I values pointing to an array of two values. The first element should contain the number of desired bounded workers, and the second element should contain the number of desired unbounded workers. These are both maximum values, io_uring will not maintain a high count of idle workers, they are reaped when they are not necessary anymore. If called with both values set to 0, the existing values are returned. .SH RETURN VALUE Returns .B 0 on success, with .I values containing the previous values for the settings. On error, any of the following may be returned. .TP .B -EFAULT The kernel was unable to copy the memory pointer to by .I values as it was invalid. .TP .B -EINVAL .I values was .B NULL or the new values exceeded the maximum allowed value. .SH SEE ALSO .BR io_uring_queue_init (3), .BR io_uring_register (2) liburing-2.6/man/io_uring_register_napi.3000066400000000000000000000021471461424365000205720ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual" .SH NAME io_uring_register_napi \- register NAPI busy poll settings .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_napi(struct io_uring *" ring "," .BI " struct io_uring_napi *" napi) .PP .fi .SH DESCRIPTION .PP The .BR io_uring_register_napi (3) function registers the NAPI settings for subsequent operations. The NAPI settings are specified in the structure that is passed in the .I napi parameter. The structure consists of the napi timeout .I busy_poll_to (napi busy poll timeout in us) and .IR prefer_busy_poll . Registering a NAPI settings sets the mode when calling the function napi_busy_loop and corresponds to the SO_PREFER_BUSY_POLL socket option. NAPI busy poll can reduce the network roundtrip time. .SH RETURN VALUE On success .BR io_uring_register_napi (3) return 0. On failure they return .BR -errno . It also updates the napi structure with the current values. liburing-2.6/man/io_uring_register_ring_fd.3000066400000000000000000000033101461424365000212440ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_ring_fd 3 "March 11, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_register_ring_fd \- register a ring file descriptor .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_ring_fd(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP .BR io_uring_register_ring_fd (3) registers the file descriptor of the ring. Whenever .BR io_uring_enter (2) is called to submit request or wait for completions, the kernel must grab a reference to the file descriptor. If the application using io_uring is threaded, the file table is marked as shared, and the reference grab and put of the file descriptor count is more expensive than it is for a non-threaded application. Similarly to how io_uring allows registration of files, this allow registration of the ring file descriptor itself. This reduces the overhead of the .BR io_uring_enter (2) system call. If an application using liburing is threaded, then an application should call this function to register the ring descriptor when a ring is set up. See NOTES for restrictions when a ring is shared. .SH NOTES When the ring descriptor is registered, it is stored internally in the .I struct io_uring structure. For applications that share a ring between threads, for example having one thread do submits and another reap events, then this optimization cannot be used as each thread may have a different index for the registered ring fd. .SH RETURN VALUE Returns 1 on success, indicating that one file descriptor was registered, or .BR -errno on error. .SH SEE ALSO .BR io_uring_unregister_ring_fd (3), .BR io_uring_register_files (3) liburing-2.6/man/io_uring_register_sync_cancel.3000066400000000000000000000033321461424365000221210ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_register_sync_cancel 3 "September 21, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_register_sync_cancel \- issue a synchronous cancelation request .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_register_sync_cancel(struct io_uring *" ring ", .BI " struct io_uring_sync_cancel_reg *" reg "); .PP .SH DESCRIPTION .PP The .BR io_uring_register_sync_cancel (3) function performs a synchronous cancelation request based on the parameters specified in .I reg . The .I reg argument must be filled in with the appropriate information for the cancelation request. It looks as follows: .PP .in +4n .EX struct io_uring_sync_cancel_reg { __u64 addr; __s32 fd; __u32 flags; struct __kernel_timespec timeout; __u64 pad[4]; }; .EE .in .PP The arguments largely mirror what the async prep functions support, see .BR io_uring_prep_cancel (3) for details. Similarly, the return value is the same. The exception is the .I timeout argument, which can be used to limit the time that the kernel will wait for cancelations to be successful. If the .I tv_sec and .I tv_nsec values are set to anything but .B -1UL , then they indicate a relative timeout upon which cancelations should be completed by. The .I pad values must be zero filled. .SH RETURN VALUE See .BR io_uring_prep_cancel (3) for details on the return value. If .I timeout is set to indicate a timeout, then .B -ETIME will be returned if exceeded. If an unknown value is set in the request, or if the pad values are not cleared to zero, then .I -EINVAL is returned. .SH SEE ALSO .BR io_uring_prep_cancel (3) liburing-2.6/man/io_uring_setup.2000066400000000000000000000567041461424365000171060ustar00rootroot00000000000000.\" Copyright (C) 2019 Jens Axboe .\" Copyright (C) 2019 Jon Corbet .\" Copyright (C) 2019 Red Hat, Inc. .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_setup 2 2019-01-29 "Linux" "Linux Programmer's Manual" .SH NAME io_uring_setup \- setup a context for performing asynchronous I/O .SH SYNOPSIS .nf .BR "#include " .PP .BI "int io_uring_setup(u32 " entries ", struct io_uring_params *" p ); .fi .PP .SH DESCRIPTION .PP The .BR io_uring_setup (2) system call sets up a submission queue (SQ) and completion queue (CQ) with at least .I entries entries, and returns a file descriptor which can be used to perform subsequent operations on the io_uring instance. The submission and completion queues are shared between userspace and the kernel, which eliminates the need to copy data when initiating and completing I/O. .I params is used by the application to pass options to the kernel, and by the kernel to convey information about the ring buffers. .PP .in +4n .EX struct io_uring_params { __u32 sq_entries; __u32 cq_entries; __u32 flags; __u32 sq_thread_cpu; __u32 sq_thread_idle; __u32 features; __u32 wq_fd; __u32 resv[3]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; .EE .in .PP The .IR flags , .IR sq_thread_cpu , and .I sq_thread_idle fields are used to configure the io_uring instance. .I flags is a bit mask of 0 or more of the following values ORed together: .TP .B IORING_SETUP_IOPOLL Perform busy-waiting for an I/O completion, as opposed to getting notifications via an asynchronous IRQ (Interrupt Request). The file system (if any) and block device must support polling in order for this to work. Busy-waiting provides lower latency, but may consume more CPU resources than interrupt driven I/O. Currently, this feature is usable only on a file descriptor opened using the .B O_DIRECT flag. When a read or write is submitted to a polled context, the application must poll for completions on the CQ ring by calling .BR io_uring_enter (2). It is illegal to mix and match polled and non-polled I/O on an io_uring instance. This is only applicable for storage devices for now, and the storage device must be configured for polling. How to do that depends on the device type in question. For NVMe devices, the nvme driver must be loaded with the .I poll_queues parameter set to the desired number of polling queues. The polling queues will be shared appropriately between the CPUs in the system, if the number is less than the number of online CPU threads. .TP .B IORING_SETUP_SQPOLL When this flag is specified, a kernel thread is created to perform submission queue polling. An io_uring instance configured in this way enables an application to issue I/O without ever context switching into the kernel. By using the submission queue to fill in new submission queue entries and watching for completions on the completion queue, the application can submit and reap I/Os without doing a single system call. If the kernel thread is idle for more than .I sq_thread_idle milliseconds, it will set the .B IORING_SQ_NEED_WAKEUP bit in the .I flags field of the .IR "struct io_sq_ring" . When this happens, the application must call .BR io_uring_enter (2) to wake the kernel thread. If I/O is kept busy, the kernel thread will never sleep. An application making use of this feature will need to guard the .BR io_uring_enter (2) call with the following code sequence: .in +4n .EX /* * Ensure that the wakeup flag is read after the tail pointer * has been written. It's important to use memory load acquire * semantics for the flags read, as otherwise the application * and the kernel might not agree on the consistency of the * wakeup flag. */ unsigned flags = atomic_load_relaxed(sq_ring->flags); if (flags & IORING_SQ_NEED_WAKEUP) io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP); .EE .in where .I sq_ring is a submission queue ring setup using the .I struct io_sqring_offsets described below. .TP .BR Note that, when using a ring setup with .B IORING_SETUP_SQPOLL, you never directly call the .BR io_uring_enter (2) system call. That is usually taken care of by liburing's .BR io_uring_submit (3) function. It automatically determines if you are using polling mode or not and deals with when your program needs to call .BR io_uring_enter (2) without you having to bother about it. .TP .BR Before version 5.11 of the Linux kernel, to successfully use this feature, the application must register a set of files to be used for IO through .BR io_uring_register (2) using the .B IORING_REGISTER_FILES opcode. Failure to do so will result in submitted IO being errored with .B EBADF. The presence of this feature can be detected by the .B IORING_FEAT_SQPOLL_NONFIXED feature flag. In version 5.11 and later, it is no longer necessary to register files to use this feature. 5.11 also allows using this as non-root, if the user has the .B CAP_SYS_NICE capability. In 5.13 this requirement was also relaxed, and no special privileges are needed for SQPOLL in newer kernels. Certain stable kernels older than 5.13 may also support unprivileged SQPOLL. .TP .B IORING_SETUP_SQ_AFF If this flag is specified, then the poll thread will be bound to the cpu set in the .I sq_thread_cpu field of the .IR "struct io_uring_params" . This flag is only meaningful when .B IORING_SETUP_SQPOLL is specified. When cgroup setting .I cpuset.cpus changes (typically in container environment), the bounded cpu set may be changed as well. .TP .B IORING_SETUP_CQSIZE Create the completion queue with .IR "struct io_uring_params.cq_entries" entries. The value must be greater than .IR entries , and may be rounded up to the next power-of-two. .TP .B IORING_SETUP_CLAMP If this flag is specified, and if .IR entries exceeds .BR IORING_MAX_ENTRIES , then .IR entries will be clamped at .BR IORING_MAX_ENTRIES . If the flag .B IORING_SETUP_CQSIZE is set, and if the value of .IR "struct io_uring_params.cq_entries" exceeds .BR IORING_MAX_CQ_ENTRIES , then it will be clamped at .BR IORING_MAX_CQ_ENTRIES . .TP .B IORING_SETUP_ATTACH_WQ This flag should be set in conjunction with .IR "struct io_uring_params.wq_fd" being set to an existing io_uring ring file descriptor. When set, the io_uring instance being created will share the asynchronous worker thread backend of the specified io_uring ring, rather than create a new separate thread pool. .TP .B IORING_SETUP_R_DISABLED If this flag is specified, the io_uring ring starts in a disabled state. In this state, restrictions can be registered, but submissions are not allowed. See .BR io_uring_register (2) for details on how to enable the ring. Available since 5.10. .TP .B IORING_SETUP_SUBMIT_ALL Normally io_uring stops submitting a batch of requests, if one of these requests results in an error. This can cause submission of less than what is expected, if a request ends in error while being submitted. If the ring is created with this flag, .BR io_uring_enter (2) will continue submitting requests even if it encounters an error submitting a request. CQEs are still posted for errored request regardless of whether or not this flag is set at ring creation time, the only difference is if the submit sequence is halted or continued when an error is observed. Available since 5.18. .TP .B IORING_SETUP_COOP_TASKRUN By default, io_uring will interrupt a task running in userspace when a completion event comes in. This is to ensure that completions run in a timely manner. For a lot of use cases, this is overkill and can cause reduced performance from both the inter-processor interrupt used to do this, the kernel/user transition, the needless interruption of the tasks userspace activities, and reduced batching if completions come in at a rapid rate. Most applications don't need the forceful interruption, as the events are processed at any kernel/user transition. The exception are setups where the application uses multiple threads operating on the same ring, where the application waiting on completions isn't the one that submitted them. For most other use cases, setting this flag will improve performance. Available since 5.19. .TP .B IORING_SETUP_TASKRUN_FLAG Used in conjunction with .B IORING_SETUP_COOP_TASKRUN, this provides a flag, .B IORING_SQ_TASKRUN, which is set in the SQ ring .I flags whenever completions are pending that should be processed. liburing will check for this flag even when doing .BR io_uring_peek_cqe (3) and enter the kernel to process them, and applications can do the same. This makes .B IORING_SETUP_TASKRUN_FLAG safe to use even when applications rely on a peek style operation on the CQ ring to see if anything might be pending to reap. Available since 5.19. .TP .B IORING_SETUP_SQE128 If set, io_uring will use 128-byte SQEs rather than the normal 64-byte sized variant. This is a requirement for using certain request types, as of 5.19 only the .B IORING_OP_URING_CMD passthrough command for NVMe passthrough needs this. Available since 5.19. .TP .B IORING_SETUP_CQE32 If set, io_uring will use 32-byte CQEs rather than the normal 16-byte sized variant. This is a requirement for using certain request types, as of 5.19 only the .B IORING_OP_URING_CMD passthrough command for NVMe passthrough needs this. Available since 5.19. .TP .B IORING_SETUP_SINGLE_ISSUER A hint to the kernel that only a single task (or thread) will submit requests, which is used for internal optimisations. The submission task is either the task that created the ring, or if .B IORING_SETUP_R_DISABLED is specified then it is the task that enables the ring through .BR io_uring_register (2) . The kernel enforces this rule, failing requests with .B -EEXIST if the restriction is violated. Note that when .B IORING_SETUP_SQPOLL is set it is considered that the polling task is doing all submissions on behalf of the userspace and so it always complies with the rule disregarding how many userspace tasks do .BR io_uring_enter(2). Available since 6.0. .TP .B IORING_SETUP_DEFER_TASKRUN By default, io_uring will process all outstanding work at the end of any system call or thread interrupt. This can delay the application from making other progress. Setting this flag will hint to io_uring that it should defer work until an .BR io_uring_enter(2) call with the .B IORING_ENTER_GETEVENTS flag set. This allows the application to request work to run just before it wants to process completions. This flag requires the .BR IORING_SETUP_SINGLE_ISSUER flag to be set, and also enforces that the call to .BR io_uring_enter(2) is called from the same thread that submitted requests. Note that if this flag is set then it is the application's responsibility to periodically trigger work (for example via any of the CQE waiting functions) or else completions may not be delivered. Available since 6.1. .TP .B IORING_SETUP_NO_MMAP By default, io_uring allocates kernel memory that callers must subsequently .BR mmap (2). If this flag is set, io_uring instead uses caller-allocated buffers; .I p->cq_off.user_addr must point to the memory for the sq/cq rings, and .I p->sq_off.user_addr must point to the memory for the sqes. Each allocation must be contiguous memory. Typically, callers should allocate this memory by using .BR mmap (2) to allocate a huge page. If this flag is set, a subsequent attempt to .BR mmap (2) the io_uring file descriptor will fail. Available since 6.5. .TP .B IORING_SETUP_REGISTERED_FD_ONLY If this flag is set, io_uring will register the ring file descriptor, and return the registered descriptor index, without ever allocating an unregistered file descriptor. The caller will need to use .B IORING_REGISTER_USE_REGISTERED_RING when calling .BR io_uring_register (2). This flag only makes sense when used alongside with .B IORING_SETUP_NO_MMAP, which also needs to be set. Available since 6.5. .TP .B IORING_SETUP_NO_SQARRAY If this flag is set, entries in the submission queue will be submitted in order, wrapping around to the first entry after reaching the end of the queue. In other words, there will be no more indirection via the array of submission entries, and the queue will be indexed directly by the submission queue tail and the range of indexed represented by it modulo queue size. Subsequently, the user should not map the array of submission queue entries, and the corresponding offset in .I struct io_sqring_offsets will be set to zero. Available since 6.6. .PP If no flags are specified, the io_uring instance is setup for interrupt driven I/O. I/O may be submitted using .BR io_uring_enter (2) and can be reaped by polling the completion queue. The .I resv array must be initialized to zero. .I features is filled in by the kernel, which specifies various features supported by current kernel version. .TP .B IORING_FEAT_SINGLE_MMAP If this flag is set, the two SQ and CQ rings can be mapped with a single .I mmap(2) call. The SQEs must still be allocated separately. This brings the necessary .I mmap(2) calls down from three to two. Available since kernel 5.4. .TP .B IORING_FEAT_NODROP If this flag is set, io_uring supports almost never dropping completion events. A dropped event can only occur if the kernel runs out of memory, in which case you have worse problems than a lost event. Your application and others will likely get OOM killed anyway. If a completion event occurs and the CQ ring is full, the kernel stores the event internally until such a time that the CQ ring has room for more entries. In earlier kernels, if this overflow condition is entered, attempting to submit more IO would fail with the .B -EBUSY error value, if it can't flush the overflown events to the CQ ring. If this happens, the application must reap events from the CQ ring and attempt the submit again. If the kernel has no free memory to store the event internally it will be visible by an increase in the overflow value on the cqring. Available since kernel 5.5. Additionally .BR io_uring_enter (2) will return .B -EBADR the next time it would otherwise sleep waiting for completions (since kernel 5.19). .TP .B IORING_FEAT_SUBMIT_STABLE If this flag is set, applications can be certain that any data for async offload has been consumed when the kernel has consumed the SQE. Available since kernel 5.5. .TP .B IORING_FEAT_RW_CUR_POS If this flag is set, applications can specify .I offset == .B -1 with .B IORING_OP_{READV,WRITEV} , .B IORING_OP_{READ,WRITE}_FIXED , and .B IORING_OP_{READ,WRITE} to mean current file position, which behaves like .I preadv2(2) and .I pwritev2(2) with .I offset == .B -1. It'll use (and update) the current file position. This obviously comes with the caveat that if the application has multiple reads or writes in flight, then the end result will not be as expected. This is similar to threads sharing a file descriptor and doing IO using the current file position. Available since kernel 5.6. .TP .B IORING_FEAT_CUR_PERSONALITY If this flag is set, then io_uring guarantees that both sync and async execution of a request assumes the credentials of the task that called .I io_uring_enter(2) to queue the requests. If this flag isn't set, then requests are issued with the credentials of the task that originally registered the io_uring. If only one task is using a ring, then this flag doesn't matter as the credentials will always be the same. Note that this is the default behavior, tasks can still register different personalities through .I io_uring_register(2) with .B IORING_REGISTER_PERSONALITY and specify the personality to use in the sqe. Available since kernel 5.6. .TP .B IORING_FEAT_FAST_POLL If this flag is set, then io_uring supports using an internal poll mechanism to drive data/space readiness. This means that requests that cannot read or write data to a file no longer need to be punted to an async thread for handling, instead they will begin operation when the file is ready. This is similar to doing poll + read/write in userspace, but eliminates the need to do so. If this flag is set, requests waiting on space/data consume a lot less resources doing so as they are not blocking a thread. Available since kernel 5.7. .TP .B IORING_FEAT_POLL_32BITS If this flag is set, the .B IORING_OP_POLL_ADD command accepts the full 32-bit range of epoll based flags. Most notably .B EPOLLEXCLUSIVE which allows exclusive (waking single waiters) behavior. Available since kernel 5.9. .TP .B IORING_FEAT_SQPOLL_NONFIXED If this flag is set, the .B IORING_SETUP_SQPOLL feature no longer requires the use of fixed files. Any normal file descriptor can be used for IO commands without needing registration. Available since kernel 5.11. .TP .B IORING_FEAT_ENTER_EXT_ARG If this flag is set, then the .BR io_uring_enter (2) system call supports passing in an extended argument instead of just the .IR "sigset_t" of earlier kernels. This. extended argument is of type .IR "struct io_uring_getevents_arg" and allows the caller to pass in both a .IR "sigset_t" and a timeout argument for waiting on events. The struct layout is as follows: .TP .in +8n .EX struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; __u32 pad; __u64 ts; }; .EE and a pointer to this struct must be passed in if .B IORING_ENTER_EXT_ARG is set in the flags for the enter system call. Available since kernel 5.11. .TP .B IORING_FEAT_NATIVE_WORKERS If this flag is set, io_uring is using native workers for its async helpers. Previous kernels used kernel threads that assumed the identity of the original io_uring owning task, but later kernels will actively create what looks more like regular process threads instead. Available since kernel 5.12. .TP .B IORING_FEAT_RSRC_TAGS If this flag is set, then io_uring supports a variety of features related to fixed files and buffers. In particular, it indicates that registered buffers can be updated in-place, whereas before the full set would have to be unregistered first. Available since kernel 5.13. .TP .B IORING_FEAT_CQE_SKIP If this flag is set, then io_uring supports setting .B IOSQE_CQE_SKIP_SUCCESS in the submitted SQE, indicating that no CQE should be generated for this SQE if it executes normally. If an error happens processing the SQE, a CQE with the appropriate error value will still be generated. Available since kernel 5.17. .TP .B IORING_FEAT_LINKED_FILE If this flag is set, then io_uring supports sane assignment of files for SQEs that have dependencies. For example, if a chain of SQEs are submitted with .B IOSQE_IO_LINK, then kernels without this flag will prepare the file for each link upfront. If a previous link opens a file with a known index, eg if direct descriptors are used with open or accept, then file assignment needs to happen post execution of that SQE. If this flag is set, then the kernel will defer file assignment until execution of a given request is started. Available since kernel 5.17. .TP .B IORING_FEAT_REG_REG_RING If this flag is set, then io_uring supports calling .BR io_uring_register (2) using a registered ring fd, via .BR IORING_REGISTER_USE_REGISTERED_RING . Available since kernel 6.3. .PP The rest of the fields in the .I struct io_uring_params are filled in by the kernel, and provide the information necessary to memory map the submission queue, completion queue, and the array of submission queue entries. .I sq_entries specifies the number of submission queue entries allocated. .I sq_off describes the offsets of various ring buffer fields: .PP .in +4n .EX struct io_sqring_offsets { __u32 head; __u32 tail; __u32 ring_mask; __u32 ring_entries; __u32 flags; __u32 dropped; __u32 array; __u32 resv1; __u64 user_addr; }; .EE .in .PP Taken together, .I sq_entries and .I sq_off provide all of the information necessary for accessing the submission queue ring buffer and the submission queue entry array. The submission queue can be mapped with a call like: .PP .in +4n .EX ptr = mmap(0, sq_off.array + sq_entries * sizeof(__u32), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_SQ_RING); .EE .in .PP where .I sq_off is the .I io_sqring_offsets structure, and .I ring_fd is the file descriptor returned from .BR io_uring_setup (2). The addition of .I sq_off.array to the length of the region accounts for the fact that the ring is located at the end of the data structure. As an example, the ring buffer head pointer can be accessed by adding .I sq_off.head to the address returned from .BR mmap (2): .PP .in +4n .EX head = ptr + sq_off.head; .EE .in The .I flags field is used by the kernel to communicate state information to the application. Currently, it is used to inform the application when a call to .BR io_uring_enter (2) is necessary. See the documentation for the .B IORING_SETUP_SQPOLL flag above. The .I dropped member is incremented for each invalid submission queue entry encountered in the ring buffer. The head and tail track the ring buffer state. The tail is incremented by the application when submitting new I/O, and the head is incremented by the kernel when the I/O has been successfully submitted. Determining the index of the head or tail into the ring is accomplished by applying a mask: .PP .in +4n .EX index = tail & ring_mask; .EE .in .PP The array of submission queue entries is mapped with: .PP .in +4n .EX sqentries = mmap(0, sq_entries * sizeof(struct io_uring_sqe), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_SQES); .EE .in .PP The completion queue is described by .I cq_entries and .I cq_off shown here: .PP .in +4n .EX struct io_cqring_offsets { __u32 head; __u32 tail; __u32 ring_mask; __u32 ring_entries; __u32 overflow; __u32 cqes; __u32 flags; __u32 resv1; __u64 user_addr; }; .EE .in .PP The completion queue is simpler, since the entries are not separated from the queue itself, and can be mapped with: .PP .in +4n .EX ptr = mmap(0, cq_off.cqes + cq_entries * sizeof(struct io_uring_cqe), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, ring_fd, IORING_OFF_CQ_RING); .EE .in .PP Closing the file descriptor returned by .BR io_uring_setup (2) will free all resources associated with the io_uring context. Note that this may happen asynchronously within the kernel, so it is not guaranteed that resources are freed immediately. .PP .SH RETURN VALUE .BR io_uring_setup (2) returns a new file descriptor on success. The application may then provide the file descriptor in a subsequent .BR mmap (2) call to map the submission and completion queues, or to the .BR io_uring_register (2) or .BR io_uring_enter (2) system calls. On error, a negative error code is returned. The caller should not rely on .I errno variable. .PP .SH ERRORS .TP .B EFAULT params is outside your accessible address space. .TP .B EINVAL The resv array contains non-zero data, p.flags contains an unsupported flag, .I entries is out of bounds, .B IORING_SETUP_SQ_AFF was specified, but .B IORING_SETUP_SQPOLL was not, or .B IORING_SETUP_CQSIZE was specified, but .I io_uring_params.cq_entries was invalid. .B IORING_SETUP_REGISTERED_FD_ONLY was specified, but .B IORING_SETUP_NO_MMAP was not. .TP .B EMFILE The per-process limit on the number of open file descriptors has been reached (see the description of .B RLIMIT_NOFILE in .BR getrlimit (2)). .TP .B ENFILE The system-wide limit on the total number of open files has been reached. .TP .B ENOMEM Insufficient kernel resources are available. .TP .B EPERM .B IORING_SETUP_SQPOLL was specified, but the effective user ID of the caller did not have sufficient privileges. .TP .B EPERM .I /proc/sys/kernel/io_uring_disabled has the value 2, or it has the value 1 and the calling process does not hold the .B CAP_SYS_ADMIN capability or is not a member of .I /proc/sys/kernel/io_uring_group. .SH SEE ALSO .BR io_uring_register (2), .BR io_uring_enter (2) liburing-2.6/man/io_uring_setup_buf_ring.3000066400000000000000000000047031461424365000207520ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_setup_buf_ring 3 "Mar 07, 2023" "liburing-2.4" "liburing Manual" .SH NAME io_uring_setup_buf_ring \- setup and register buffer ring for provided buffers .SH SYNOPSIS .nf .B #include .PP .BI "struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *" ring ", .BI " unsigned int " nentries ", .BI " int " bgid ", .BI " unsigned int " flags ", .BI " int *" ret ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_setup_buf_ring (3) function registers a shared buffer ring to be used with provided buffers. For the request types that support it, provided buffers are given to the ring and one is selected by a request if it has .B IOSQE_BUFFER_SELECT set in the SQE .IR flags , when the request is ready to receive data. This allows both clear ownership of the buffer lifetime, and a way to have more read/receive type of operations in flight than buffers available. The .I ring argument must pointer to the ring for which the provided buffer ring is being registered, .I nentries is the number of entries requested in the buffer ring. This argument must be a power-of 2 in size. .I bgid is the chosen buffer group ID, .I flags are modifier flags for the operation, and .I *ret is is a pointer to an integer for the error value if any part of the ring allocation and registration fails. The .I flags argument is currently unused and must be set to zero. Under the covers, this function uses .BR io_uring_register_buf_ring (3) to register the ring, and handles the allocation of the ring rather than letting the application open code it. To unregister and free a buffer group ID setup with this function, the application must call .BR io_uring_free_buf_ring (3) . Available since 5.19. .SH RETURN VALUE On success .BR io_uring_setup_buf_ring (3) returns a pointer to the buffer ring. On failure it returns .BR NULL and sets .I *ret to -errno. .SH NOTES Note that even if the kernel supports this feature, registering a provided buffer ring may still fail with .B -EINVAL if the host is a 32-bit architecture and the memory being passed in resides in high memory. .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_init (3), .BR io_uring_buf_ring_add (3), .BR io_uring_buf_ring_advance (3), .BR io_uring_buf_ring_cq_advance (3) liburing-2.6/man/io_uring_sq_ready.3000066400000000000000000000016001461424365000175370ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_sq_ready 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_sq_ready \- number of unconsumed or unsubmitted entries in the SQ ring .SH SYNOPSIS .nf .B #include .PP .BI "unsigned io_uring_sq_ready(const struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_sq_ready (3) function returns the number of unconsumed (if SQPOLL) or unsubmitted entries that exist in the SQ ring belonging to the .I ring param. Usage of this function only applies if the ring has been setup with .B IORING_SETUP_SQPOLL, where request submissions, and hence consumption from the SQ ring, happens through a polling thread. .SH RETURN VALUE Returns the number of unconsumed or unsubmitted entries in the SQ ring. .SH SEE ALSO .BR io_uring_cq_ready (3) liburing-2.6/man/io_uring_sq_space_left.3000066400000000000000000000011611461424365000205420ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_sq_space-left 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_sq_space_left \- free space in the SQ ring .SH SYNOPSIS .nf .B #include .PP .BI "unsigned io_uring_sq_space_left(const struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_sq_space_left (3) function returns how much space is left in the SQ ring belonging to the .I ring param. .SH RETURN VALUE Returns the number of availables entries in the SQ ring. .SH SEE ALSO .BR io_uring_sq_ready (3) liburing-2.6/man/io_uring_sqe_set_data.3000066400000000000000000000023371461424365000203740ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_sqe_set_data 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_sqe_set_data \- set user data for submission queue event .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_sqe_set_data(struct io_uring_sqe *" sqe "," .BI " void *" user_data ");" .BI " .BI "void io_uring_sqe_set_data64(struct io_uring_sqe *" sqe "," .BI " __u64 " data ");" .fi .SH DESCRIPTION .PP The .BR io_uring_sqe_set_data (3) function stores a .I user_data pointer with the submission queue entry .IR sqe . The .BR io_uring_sqe_set_data64 (3) function stores a 64-bit .I data value with the submission queue entry .IR sqe . After the caller has requested a submission queue entry (SQE) with .BR io_uring_get_sqe (3) , they can associate a data pointer or value with the SQE. Once the completion arrives, the function .BR io_uring_cqe_get_data (3) or .BR io_uring_cqe_get_data64 (3) can be called to retrieve the data pointer or value associated with the submitted request. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_cqe_get_data (3) liburing-2.6/man/io_uring_sqe_set_data64.3000077700000000000000000000000001461424365000251472io_uring_sqe_set_data.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_sqe_set_flags.3000066400000000000000000000066771461424365000205720ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_sqe_set_flags 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_sqe_set_flags \- set flags for submission queue entry .SH SYNOPSIS .nf .B #include .PP .BI "void io_uring_sqe_set_flags(struct io_uring_sqe *" sqe "," .BI " unsigned " flags ");" .fi .SH DESCRIPTION .PP The .BR io_uring_sqe_set_flags (3) function allows the caller to change the behavior of the submission queue entry by specifying flags. It enables the .I flags belonging to the .I sqe submission queue entry param. .I flags is a bit mask of 0 or more of the following values ORed together: .TP .B IOSQE_FIXED_FILE The file descriptor in the SQE refers to the index of a previously registered file or direct file descriptor, not a normal file descriptor. .TP .B IOSQE_ASYNC Normal operation for io_uring is to try and issue an sqe as non-blocking first, and if that fails, execute it in an async manner. To support more efficient overlapped operation of requests that the application knows/assumes will always (or most of the time) block, the application can ask for an sqe to be issued async from the start. Note that this flag immediately causes the SQE to be offloaded to an async helper thread with no initial non-blocking attempt. This may be less efficient and should not be used liberally or without understanding the performance and efficiency tradeoffs. .TP .B IOSQE_IO_LINK When this flag is specified, the SQE forms a link with the next SQE in the submission ring. That next SQE will not be started before the previous request completes. This, in effect, forms a chain of SQEs, which can be arbitrarily long. The tail of the chain is denoted by the first SQE that does not have this flag set. Chains are not supported across submission boundaries. Even if the last SQE in a submission has this flag set, it will still terminate the current chain. This flag has no effect on previous SQE submissions, nor does it impact SQEs that are outside of the chain tail. This means that multiple chains can be executing in parallel, or chains and individual SQEs. Only members inside the chain are serialized. A chain of SQEs will be broken if any request in that chain ends in error. .TP .B IOSQE_IO_HARDLINK Like .B IOSQE_IO_LINK , except the links aren't severed if an error or unexpected result occurs. .TP .B IOSQE_IO_DRAIN When this flag is specified, the SQE will not be started before previously submitted SQEs have completed, and new SQEs will not be started before this one completes. .TP .B IOSQE_CQE_SKIP_SUCCESS Request that no CQE be generated for this request, if it completes successfully. This can be useful in cases where the application doesn't need to know when a specific request completed, if it completed successfully. .TP .B IOSQE_BUFFER_SELECT If set, and if the request types supports it, select an IO buffer from the indicated buffer group. This can be used with requests that read or receive data from a file or socket, where buffer selection is deferred until the kernel is ready to transfer data, instead of when the IO is originally submitted. The application must also set the .I buf_group field in the SQE, indicating which previously registered buffer group to select a buffer from. .SH RETURN VALUE None .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_register (3) .BR io_uring_register_buffers (3) .BR io_uring_register_buf_ring (3) liburing-2.6/man/io_uring_sqring_wait.3000066400000000000000000000017711461424365000202700ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_sqring_wait 3 "January 25, 2022" "liburing-2.1" "liburing Manual" .SH NAME io_uring_sqring_wait \- wait for free space in the SQ ring .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_sqring_wait(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The function .BR io_uring_sqring_wait (3) allows the caller to wait for space to free up in the SQ ring belonging to the .I ring param, which happens when the kernel side thread has consumed one or more entries. If the SQ ring is currently non-full, no action is taken. This feature can only be used when the ring has been setup with .B IORING_SETUP_SQPOLL and hence is using an offloaded approach to request submissions. .SH RETURN VALUE On success it returns the free space. If the kernel does not support the feature, -EINVAL is returned. .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqe (3), .BR io_uring_wait_cqes (3) liburing-2.6/man/io_uring_submit.3000066400000000000000000000036031461424365000172400ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_submit 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_submit \- submit requests to the submission queue .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_submit(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_submit (3) function submits the next events to the submission queue belonging to the .IR ring . After the caller retrieves a submission queue entry (SQE) with .BR io_uring_get_sqe (3) and prepares the SQE using one of the provided helpers, it can be submitted with .BR io_uring_submit (3) . .SH RETURN VALUE On success .BR io_uring_submit (3) returns the number of submitted submission queue entries, if SQPOLL is not used. If SQPOLL is used, the return value may report a higher number of submitted entries than actually submitted. If the the user requires accurate information about how many submission queue entries have been successfully submitted, while using SQPOLL, the user must fall back to repeatedly submitting a single submission queue entry. On failure it returns .BR -errno . .SH NOTES For any request that passes in data in a struct, that data must remain valid until the request has been successfully submitted. It need not remain valid until completion. Once a request has been submitted, the in-kernel state is stable. Very early kernels (5.4 and earlier) required state to be stable until the completion occurred. Applications can test for this behavior by inspecting the .B IORING_FEAT_SUBMIT_STABLE flag passed back from .BR io_uring_queue_init_params (3). In general, the man pages for the individual prep helpers will have a note mentioning this fact as well, if required for the given command. .SH SEE ALSO .BR io_uring_get_sqe (3), .BR io_uring_submit_and_wait (3), .BR io_uring_submit_and_wait_timeout (3) liburing-2.6/man/io_uring_submit_and_get_events.3000066400000000000000000000017101461424365000223020ustar00rootroot00000000000000.\" Copyright (C), 2022 dylany .\" You may distribute this file under the terms of the GNU Free .\" Documentation License. .TH io_uring_submit_and_get_events 3 "September 5, 2022" "liburing-2.3" "liburing Manual" .SH NAME io_uring_submit_and_get_events \- submit requests to the submission queue and flush completions .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_submit_and_get_events(struct io_uring *" ring ");" .fi .SH DESCRIPTION The .BR io_uring_submit_and_get_events (3) function submits the next events to the submission queue as with .BR io_uring_submit (3) . After submission it will flush CQEs as with .BR io_uring_get_events (3) . The benefit of this function is that it does both with only one system call. .SH RETURN VALUE On success .BR io_uring_submit_and_get_events (3) returns the number of submitted submission queue entries. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_get_events (3) liburing-2.6/man/io_uring_submit_and_wait.3000066400000000000000000000024611461424365000211070ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_submit_and_wait 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_submit_and_wait \- submit requests to the submission queue and wait for completion .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_submit_and_wait(struct io_uring *" ring "," .BI " unsigned " wait_nr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_submit_and_wait (3) function submits the next requests from the submission queue belonging to the .I ring and waits for .I wait_nr completion events. After the caller retrieves a submission queue entry (SQE) with .BR io_uring_get_sqe (3) and prepares the SQE, it can be submitted with .BR io_uring_submit_and_wait (3) . Ideally used with a ring setup with .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN as that will greatly reduce the number of context switches that an application will see waiting on multiple requests. .SH RETURN VALUE On success .BR io_uring_submit_and_wait (3) returns the number of submitted submission queue entries. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_queue_init_params (3), .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_submit_and_wait_timeout (3) liburing-2.6/man/io_uring_submit_and_wait_timeout.3000066400000000000000000000043661461424365000226630ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_submit_and_wait_timeout 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_submit_and_wait_timeout \- submit requests to the submission queue and wait for the completion with timeout .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_submit_and_wait_timeout(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr "," .BI " unsigned " wait_nr "," .BI " struct __kernel_timespec *" ts "," .BI " sigset_t *" sigmask ");" .fi .SH DESCRIPTION .PP The .BR io_uring_submit_and_wait_timeout (3) function submits the next requests from the submission queue belonging to the .I ring and waits for .I wait_nr completion events, or until the timeout .I ts expires. The completion events are stored in the .I cqe_ptr array. .PP The .I sigmask specifies the set of signals to block. If set, it is equivalent to atomically executing the following calls: .PP .in +4n .EX sigset_t origmask; pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); ret = io_uring_submit_and_wait_timeout(ring, cqe, wait_nr, ts, NULL); pthread_sigmask(SIG_SETMASK, &origmask, NULL); .EE .in .PP After the caller retrieves a submission queue entry (SQE) with .BR io_uring_get_sqe (3) and prepares the SQE, it can be submitted with .BR io_uring_submit_and_wait_timeout (3) . Ideally used with a ring setup with .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN as that will greatly reduce the number of context switches that an application will see waiting on multiple requests. .SH RETURN VALUE On success .BR io_uring_submit_and_wait_timeout (3) returns the number of submitted submission queue entries. On failure it returns .BR -errno . Note that in earlier versions of the liburing library, the return value was 0 on success. The most common failure case is not receiving a completion within the specified timeout, .B -ETIME is returned in this case. .SH SEE ALSO .BR io_uring_queue_init_params (3), .BR io_uring_get_sqe (3), .BR io_uring_submit (3), .BR io_uring_submit_and_wait (3), .BR io_uring_wait_cqe (3) liburing-2.6/man/io_uring_unregister_buf_ring.3000066400000000000000000000014321461424365000217750ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_unregister_buf_ring 3 "May 18, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_unregister_buf_ring \- unregister a previously registered buffer ring .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_unregister_buf_ring(struct io_uring *" ring ", .BI " int " bgid ");" .BI " .fi .SH DESCRIPTION .PP The .BR io_uring_unregister_buf_ring (3) function unregisters a previously registered shared buffer ring indicated by .IR bgid . .SH RETURN VALUE On success .BR io_uring_unregister_buf_ring (3) returns 0. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_register_buf_ring (3), .BR io_uring_buf_ring_free (3) liburing-2.6/man/io_uring_unregister_buffers.3000066400000000000000000000012641461424365000216410ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_unregister_buffers 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_unregister_buffers \- unregister buffers for fixed buffer operations .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_unregister_buffers(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_unregister_buffers (3) function unregisters the fixed buffers previously registered to the .IR ring . .SH RETURN VALUE On success .BR io_uring_unregister_buffers (3) returns 0. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_register_buffers (3) liburing-2.6/man/io_uring_unregister_eventfd.3000077700000000000000000000000001461424365000271462io_uring_register_eventfd.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_unregister_files.3000066400000000000000000000012301461424365000213000ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_unregister_files 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_unregister_files \- unregister file descriptors .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_unregister_files(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP The .BR io_uring_unregister_files (3) function unregisters the file descriptors previously registered to the .IR ring . .SH RETURN VALUE On success .BR io_uring_unregister_files (3) returns 0. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_register_files (3) liburing-2.6/man/io_uring_unregister_iowq_aff.3000077700000000000000000000000001461424365000274462io_uring_register_iowq_aff.3ustar00rootroot00000000000000liburing-2.6/man/io_uring_unregister_napi.3000066400000000000000000000013541461424365000211340ustar00rootroot00000000000000.\" Copyright (C) 2022 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_unregister_napi 3 "November 16, 2022" "liburing-2.4" "liburing Manual" .SH NAME io_uring_unregister_napi \- unregister NAPI busy poll settings .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_unregister_napi(struct io_uring *" ring "," .BI " struct io_uring_napi *" napi) .PP .fi .SH DESCRIPTION .PP The .BR io_uring_unregister_napi (3) function unregisters the NAPI busy poll settings for subsequent operations. .SH RETURN VALUE On success .BR io_uring_unregister_napi (3) return 0. On failure they return .BR -errno . It also updates the napi structure with the current values. liburing-2.6/man/io_uring_unregister_ring_fd.3000066400000000000000000000017211461424365000216130ustar00rootroot00000000000000.\" Copyright (C) 2022 Jens Axboe .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_unregister_ring_fd 3 "March 11, 2022" "liburing-2.2" "liburing Manual" .SH NAME io_uring_unregister_ring_fd \- unregister a ring file descriptor .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_unregister_ring_fd(struct io_uring *" ring ");" .fi .SH DESCRIPTION .PP .BR io_uring_unregister_ring_fd (3) unregisters the file descriptor of the ring. Unregisters a ring descriptor previously registered with the task. This is done automatically when .BR io_uring_queue_exit (3) is called, but can also be done to free up space for new ring registrations. For more information on ring descriptor registration, see .BR io_uring_register_ring_fd (3) .SH RETURN VALUE Returns 1 on success, indicating that one file descriptor was unregistered, or .BR -errno on error. .SH SEE ALSO .BR io_uring_register_ring_fd (3), .BR io_uring_register_files (3) liburing-2.6/man/io_uring_wait_cqe.3000066400000000000000000000022631461424365000175320ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_wait_cqe 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_wait_cqe \- wait for one io_uring completion event .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_wait_cqe(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_wait_cqe (3) function waits for an IO completion from the queue belonging to the .I ring param, waiting for it if necessary. If an event is already available in the ring when invoked, no waiting will occur. The .I cqe_ptr param is filled in on success. After the caller has submitted a request with .BR io_uring_submit (3), the application can retrieve the completion with .BR io_uring_wait_cqe (3). .SH RETURN VALUE On success .BR io_uring_wait_cqe (3) returns 0 and the cqe_ptr param is filled in. On failure it returns .BR -errno . The return value indicates the result of waiting for a CQE, and it has no relation to the CQE result itself. .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqe_timeout (3), .BR io_uring_wait_cqes (3) liburing-2.6/man/io_uring_wait_cqe_nr.3000066400000000000000000000027771461424365000202430ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_wait_cqe_nr 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_wait_cqe_nr \- wait for one or more io_uring completion events .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_wait_cqe_nr(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr "," .BI " unsigned " wait_nr ");" .fi .SH DESCRIPTION .PP The .BR io_uring_wait_cqe_nr (3) function returns .I wait_nr IO completion events from the queue belonging to the .I ring param, waiting for it if necessary. If the requested number of events are already available in the ring when invoked, no waiting will occur. The .I cqe_ptr param is filled in on success. After the caller has submitted a request with .BR io_uring_submit (3), the application can retrieve the completion with .BR io_uring_wait_cqe (3). Ideally used with a ring setup with .BR IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN as that will greatly reduce the number of context switches that an application will see waiting on multiple requests. .SH RETURN VALUE On success .BR io_uring_wait_cqe_nr (3) returns 0 and the cqe_ptr param is filled in. On failure it returns .BR -errno . The return value indicates the result of waiting for a CQE, and it has no relation to the CQE result itself. .SH SEE ALSO .BR io_uring_queue_init_params (3), .BR io_uring_submit (3), .BR io_uring_wait_cqes (3) liburing-2.6/man/io_uring_wait_cqe_timeout.3000066400000000000000000000027601461424365000213020ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_wait_cqe_timeout 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_wait_cqe_timeout \- wait for one io_uring completion event with timeout .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_wait_cqe_timeout(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr "," .BI " struct __kernel_timespec *" ts ");" .fi .SH DESCRIPTION .PP The .BR io_uring_wait_cqe_timeout (3) function waits for one IO completion to be available from the queue belonging to the .I ring param, waiting for it if necessary or until the timeout .I ts expires. If an event is already available in the ring when invoked, no waiting will occur. The .I cqe_ptr param is filled in on success. If .I ts is specified and an older kernel without .B IORING_FEAT_EXT_ARG is used, the application does not need to call .BR io_uring_submit (3) before calling .BR io_uring_wait_cqes (3). For newer kernels with that feature flag set, there is no implied submit when waiting for a request. .SH RETURN VALUE On success .BR io_uring_wait_cqe_timeout (3) returns 0 and the cqe_ptr param is filled in. On failure it returns .BR -errno . The return value indicates the result of waiting for a CQE, and it has no relation to the CQE result itself. .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_wait_cqes (3), .BR io_uring_wait_cqe (3) liburing-2.6/man/io_uring_wait_cqes.3000066400000000000000000000034751461424365000177230ustar00rootroot00000000000000.\" Copyright (C) 2021 Stefan Roesch .\" .\" SPDX-License-Identifier: LGPL-2.0-or-later .\" .TH io_uring_wait_cqes 3 "November 15, 2021" "liburing-2.1" "liburing Manual" .SH NAME io_uring_wait_cqes \- wait for one or more io_uring completion events .SH SYNOPSIS .nf .B #include .PP .BI "int io_uring_wait_cqes(struct io_uring *" ring "," .BI " struct io_uring_cqe **" cqe_ptr "," .BI " unsigned " wait_nr "," .BI " struct __kernel_timespec *" ts "," .BI " sigset_t *" sigmask "); .fi .SH DESCRIPTION .PP The .BR io_uring_wait_cqes (3) function returns .I wait_nr IO completions from the queue belonging to the .I ring param, waiting for them if necessary or until the timeout .I ts expires. .PP The .I sigmask specifies the set of signals to block. If set, it is equivalent to atomically executing the following calls: .PP .in +4n .EX sigset_t origmask; pthread_sigmask(SIG_SETMASK, &sigmask, &origmask); ret = io_uring_wait_cqes(ring, cqe, wait_nr, ts, NULL); pthread_sigmask(SIG_SETMASK, &origmask, NULL); .EE .in .PP The .I cqe_ptr param is filled in on success with the first CQE. Callers of this function should use .BR io_uring_for_each_cqe (3) to iterate all available CQEs. If .I ts is specified and an older kernel without .B IORING_FEAT_EXT_ARG is used, the application does not need to call .BR io_uring_submit (3) before calling .BR io_uring_wait_cqes (3). For newer kernels with that feature flag set, there is no implied submit when waiting for a request. .SH RETURN VALUE On success .BR io_uring_wait_cqes (3) returns 0 and the cqe_ptr param is filled in. On failure it returns .BR -errno . .SH SEE ALSO .BR io_uring_submit (3), .BR io_uring_for_each_cqe (3), .BR io_uring_wait_cqe_timeout (3), .BR io_uring_wait_cqe (3) liburing-2.6/src/000077500000000000000000000000001461424365000137705ustar00rootroot00000000000000liburing-2.6/src/Makefile000066400000000000000000000103111461424365000154240ustar00rootroot00000000000000include ../Makefile.common prefix ?= /usr includedir ?= $(prefix)/include libdir ?= $(prefix)/lib libdevdir ?= $(prefix)/lib LIBURING_CFLAGS ?= CPPFLAGS ?= override CPPFLAGS += -D_GNU_SOURCE \ -Iinclude/ -include ../config-host.h \ -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 CFLAGS ?= -O3 -Wall -Wextra -fno-stack-protector override CFLAGS += -Wno-unused-parameter \ -DLIBURING_INTERNAL \ $(LIBURING_CFLAGS) SO_CFLAGS=-fPIC $(CFLAGS) L_CFLAGS=$(CFLAGS) LINK_FLAGS=-Wl,-z,defs LINK_FLAGS+=$(LDFLAGS) ENABLE_SHARED ?= 1 soname=liburing.so.$(VERSION_MAJOR) libname=liburing.so.$(VERSION) ffi_soname=liburing-ffi.so.$(VERSION_MAJOR) ffi_libname=liburing-ffi.so.$(VERSION) all_targets += liburing.a all_targets += liburing-ffi.a ifeq ($(ENABLE_SHARED),1) all_targets += $(libname) all_targets += $(ffi_libname) endif include ../Makefile.quiet ifneq ($(MAKECMDGOALS),clean) include ../config-host.mak endif all: $(all_targets) liburing_srcs := setup.c queue.c register.c syscall.c version.c ifeq ($(CONFIG_NOLIBC),y) liburing_srcs += nolibc.c override CFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector override CPPFLAGS += -nostdlib -nodefaultlibs -ffreestanding -fno-builtin -fno-stack-protector override LINK_FLAGS += -nostdlib -nodefaultlibs $(libgcc_link_flag) endif override CPPFLAGS += -MT "$@" -MMD -MP -MF "$@.d" liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs)) liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs)) liburing_ffi_objs := ffi.ol liburing_ffi_sobjs := ffi.os %.os: %.c $(QUIET_CC)$(CC) $(CPPFLAGS) $(SO_CFLAGS) -c -o $@ $< %.ol: %.c $(QUIET_CC)$(CC) $(CPPFLAGS) $(L_CFLAGS) -c -o $@ $< # Include compiler generated dependency files. -include $(liburing_objs:%=%.d) -include $(liburing_sobjs:%=%.d) AR ?= ar RANLIB ?= ranlib liburing.a: $(liburing_objs) @rm -f liburing.a $(QUIET_AR)$(AR) r liburing.a $^ $(QUIET_RANLIB)$(RANLIB) liburing.a liburing-ffi.a: $(liburing_objs) $(liburing_ffi_objs) @rm -f liburing-ffi.a $(QUIET_AR)$(AR) r liburing-ffi.a $^ $(QUIET_RANLIB)$(RANLIB) liburing-ffi.a $(libname): $(liburing_sobjs) liburing.map $(QUIET_CC)$(CC) $(SO_CFLAGS) -shared -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS) $(ffi_libname): $(liburing_ffi_objs) $(liburing_ffi_sobjs) $(liburing_sobjs) liburing-ffi.map $(QUIET_CC)$(CC) $(SO_CFLAGS) -shared -Wl,--version-script=liburing-ffi.map -Wl,-soname=$(ffi_soname) -o $@ $(liburing_sobjs) $(liburing_ffi_sobjs) $(LINK_FLAGS) install: $(all_targets) install -D -m 644 include/liburing/io_uring.h $(includedir)/liburing/io_uring.h install -D -m 644 include/liburing.h $(includedir)/liburing.h install -D -m 644 include/liburing/compat.h $(includedir)/liburing/compat.h install -D -m 644 include/liburing/barrier.h $(includedir)/liburing/barrier.h install -D -m 644 include/liburing/io_uring_version.h $(includedir)/liburing/io_uring_version.h install -D -m 644 liburing.a $(libdevdir)/liburing.a install -D -m 644 liburing-ffi.a $(libdevdir)/liburing-ffi.a ifeq ($(ENABLE_SHARED),1) install -D -m 755 $(libname) $(libdir)/$(libname) install -D -m 755 $(ffi_libname) $(libdir)/$(ffi_libname) ln -sf $(libname) $(libdir)/$(soname) ln -sf $(relativelibdir)$(libname) $(libdevdir)/liburing.so ln -sf $(ffi_libname) $(libdir)/$(ffi_soname) ln -sf $(relativelibdir)$(ffi_libname) $(libdevdir)/liburing-ffi.so endif uninstall: @rm -f $(includedir)/liburing/io_uring.h @rm -f $(includedir)/liburing.h @rm -f $(includedir)/liburing/compat.h @rm -f $(includedir)/liburing/barrier.h @rm -f $(includedir)/liburing/io_uring_version.h @rm -f $(libdevdir)/liburing.a @rm -f $(libdevdir)/liburing-ffi.a ifeq ($(ENABLE_SHARED),1) @rm -f $(libdir)/$(libname) @rm -f $(libdir)/$(ffi_libname) @rm -f $(libdir)/$(soname) @rm -f $(libdevdir)/liburing.so @rm -f $(libdir)/$(ffi_soname) @rm -f $(libdevdir)/liburing-ffi.so endif clean: @rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(liburing_ffi_objs) $(liburing_ffi_sobjs) $(soname).new @rm -f *.so* *.a *.o *.d @rm -f include/liburing/compat.h @rm -f include/liburing/io_uring_version.h @# When cleaning, we don't include ../config-host.mak, @# so the nolibc objects are always skipped, clean them up! @rm -f nolibc.ol nolibc.os liburing-2.6/src/arch/000077500000000000000000000000001461424365000147055ustar00rootroot00000000000000liburing-2.6/src/arch/aarch64/000077500000000000000000000000001461424365000161355ustar00rootroot00000000000000liburing-2.6/src/arch/aarch64/lib.h000066400000000000000000000013361461424365000170570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_AARCH64_LIB_H #define LIBURING_ARCH_AARCH64_LIB_H #include #include "../../syscall.h" static inline long __get_page_size(void) { Elf64_Off buf[2]; long ret = 4096; int fd; fd = __sys_open("/proc/self/auxv", O_RDONLY, 0); if (fd < 0) return ret; while (1) { ssize_t x; x = __sys_read(fd, buf, sizeof(buf)); if (x < (long) sizeof(buf)) break; if (buf[0] == AT_PAGESZ) { ret = buf[1]; break; } } __sys_close(fd); return ret; } static inline long get_page_size(void) { static long cache_val; if (cache_val) return cache_val; cache_val = __get_page_size(); return cache_val; } #endif /* #ifndef LIBURING_ARCH_AARCH64_LIB_H */ liburing-2.6/src/arch/aarch64/syscall.h000066400000000000000000000054511461424365000177650ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_AARCH64_SYSCALL_H #define LIBURING_ARCH_AARCH64_SYSCALL_H #if defined(__aarch64__) #define __do_syscallN(...) ({ \ __asm__ volatile ( \ "svc 0" \ : "=r"(x0) \ : __VA_ARGS__ \ : "memory", "cc"); \ (long) x0; \ }) #define __do_syscall0(__n) ({ \ register long x8 __asm__("x8") = __n; \ register long x0 __asm__("x0"); \ \ __do_syscallN("r" (x8)); \ }) #define __do_syscall1(__n, __a) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ \ __do_syscallN("r" (x8), "0" (x0)); \ }) #define __do_syscall2(__n, __a, __b) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ register __typeof__(__b) x1 __asm__("x1") = __b; \ \ __do_syscallN("r" (x8), "0" (x0), "r" (x1)); \ }) #define __do_syscall3(__n, __a, __b, __c) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ register __typeof__(__b) x1 __asm__("x1") = __b; \ register __typeof__(__c) x2 __asm__("x2") = __c; \ \ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2)); \ }) #define __do_syscall4(__n, __a, __b, __c, __d) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ register __typeof__(__b) x1 __asm__("x1") = __b; \ register __typeof__(__c) x2 __asm__("x2") = __c; \ register __typeof__(__d) x3 __asm__("x3") = __d; \ \ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3));\ }) #define __do_syscall5(__n, __a, __b, __c, __d, __e) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ register __typeof__(__b) x1 __asm__("x1") = __b; \ register __typeof__(__c) x2 __asm__("x2") = __c; \ register __typeof__(__d) x3 __asm__("x3") = __d; \ register __typeof__(__e) x4 __asm__("x4") = __e; \ \ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3), \ "r"(x4)); \ }) #define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({ \ register long x8 __asm__("x8") = __n; \ register __typeof__(__a) x0 __asm__("x0") = __a; \ register __typeof__(__b) x1 __asm__("x1") = __b; \ register __typeof__(__c) x2 __asm__("x2") = __c; \ register __typeof__(__d) x3 __asm__("x3") = __d; \ register __typeof__(__e) x4 __asm__("x4") = __e; \ register __typeof__(__f) x5 __asm__("x5") = __f; \ \ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3), \ "r" (x4), "r"(x5)); \ }) #include "../syscall-defs.h" #else /* #if defined(__aarch64__) */ #include "../generic/syscall.h" #endif /* #if defined(__aarch64__) */ #endif /* #ifndef LIBURING_ARCH_AARCH64_SYSCALL_H */ liburing-2.6/src/arch/generic/000077500000000000000000000000001461424365000163215ustar00rootroot00000000000000liburing-2.6/src/arch/generic/lib.h000066400000000000000000000004741461424365000172450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_GENERIC_LIB_H #define LIBURING_ARCH_GENERIC_LIB_H static inline long get_page_size(void) { long page_size; page_size = sysconf(_SC_PAGESIZE); if (page_size < 0) page_size = 4096; return page_size; } #endif /* #ifndef LIBURING_ARCH_GENERIC_LIB_H */ liburing-2.6/src/arch/generic/syscall.h000066400000000000000000000046231461424365000201510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_GENERIC_SYSCALL_H #define LIBURING_ARCH_GENERIC_SYSCALL_H #include static inline int __sys_io_uring_register(unsigned int fd, unsigned int opcode, const void *arg, unsigned int nr_args) { int ret; ret = syscall(__NR_io_uring_register, fd, opcode, arg, nr_args); return (ret < 0) ? -errno : ret; } static inline int __sys_io_uring_setup(unsigned int entries, struct io_uring_params *p) { int ret; ret = syscall(__NR_io_uring_setup, entries, p); return (ret < 0) ? -errno : ret; } static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, size_t sz) { int ret; ret = syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags, sig, sz); return (ret < 0) ? -errno : ret; } static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig) { return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig, _NSIG / 8); } static inline int __sys_open(const char *pathname, int flags, mode_t mode) { int ret; ret = open(pathname, flags, mode); return (ret < 0) ? -errno : ret; } static inline ssize_t __sys_read(int fd, void *buffer, size_t size) { ssize_t ret; ret = read(fd, buffer, size); return (ret < 0) ? -errno : ret; } static inline void *__sys_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { void *ret; ret = mmap(addr, length, prot, flags, fd, offset); return (ret == MAP_FAILED) ? ERR_PTR(-errno) : ret; } static inline int __sys_munmap(void *addr, size_t length) { int ret; ret = munmap(addr, length); return (ret < 0) ? -errno : ret; } static inline int __sys_madvise(void *addr, size_t length, int advice) { int ret; ret = madvise(addr, length, advice); return (ret < 0) ? -errno : ret; } static inline int __sys_getrlimit(int resource, struct rlimit *rlim) { int ret; ret = getrlimit(resource, rlim); return (ret < 0) ? -errno : ret; } static inline int __sys_setrlimit(int resource, const struct rlimit *rlim) { int ret; ret = setrlimit(resource, rlim); return (ret < 0) ? -errno : ret; } static inline int __sys_close(int fd) { int ret; ret = close(fd); return (ret < 0) ? -errno : ret; } #endif /* #ifndef LIBURING_ARCH_GENERIC_SYSCALL_H */ liburing-2.6/src/arch/riscv64/000077500000000000000000000000001461424365000162055ustar00rootroot00000000000000liburing-2.6/src/arch/riscv64/lib.h000066400000000000000000000013641461424365000171300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_RISCV64_LIB_H #define LIBURING_ARCH_RISCV64_LIB_H #include #include #include "../../syscall.h" static inline long __get_page_size(void) { Elf64_Off buf[2]; long ret = 4096; int fd; fd = __sys_open("/proc/self/auxv", O_RDONLY, 0); if (fd < 0) return ret; while (1) { ssize_t x; x = __sys_read(fd, buf, sizeof(buf)); if (x < (long) sizeof(buf)) break; if (buf[0] == AT_PAGESZ) { ret = buf[1]; break; } } __sys_close(fd); return ret; } static inline long get_page_size(void) { static long cache_val; if (cache_val) return cache_val; cache_val = __get_page_size(); return cache_val; } #endif /* #ifndef LIBURING_ARCH_RISCV64_LIB_H */ liburing-2.6/src/arch/riscv64/syscall.h000066400000000000000000000060121461424365000200270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_RISCV64_SYSCALL_H #define LIBURING_ARCH_RISCV64_SYSCALL_H #if defined(__riscv) && __riscv_xlen == 64 #define __do_syscallM(...) ({ \ __asm__ volatile ( \ "ecall" \ : "=r"(a0) \ : __VA_ARGS__ \ : "memory", "a1"); \ (long) a0; \ }) #define __do_syscallN(...) ({ \ __asm__ volatile ( \ "ecall" \ : "=r"(a0) \ : __VA_ARGS__ \ : "memory"); \ (long) a0; \ }) #define __do_syscall0(__n) ({ \ register long a7 __asm__("a7") = __n; \ register long a0 __asm__("a0"); \ \ __do_syscallM("r" (a7)); \ }) #define __do_syscall1(__n, __a) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ \ __do_syscallM("r" (a7), "0" (a0)); \ }) #define __do_syscall2(__n, __a, __b) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ register __typeof__(__b) a1 __asm__("a1") = __b; \ \ __do_syscallN("r" (a7), "0" (a0), "r" (a1)); \ }) #define __do_syscall3(__n, __a, __b, __c) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ register __typeof__(__b) a1 __asm__("a1") = __b; \ register __typeof__(__c) a2 __asm__("a2") = __c; \ \ __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2)); \ }) #define __do_syscall4(__n, __a, __b, __c, __d) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ register __typeof__(__b) a1 __asm__("a1") = __b; \ register __typeof__(__c) a2 __asm__("a2") = __c; \ register __typeof__(__d) a3 __asm__("a3") = __d; \ \ __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3));\ }) #define __do_syscall5(__n, __a, __b, __c, __d, __e) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ register __typeof__(__b) a1 __asm__("a1") = __b; \ register __typeof__(__c) a2 __asm__("a2") = __c; \ register __typeof__(__d) a3 __asm__("a3") = __d; \ register __typeof__(__e) a4 __asm__("a4") = __e; \ \ __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3), \ "r"(a4)); \ }) #define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({ \ register long a7 __asm__("a7") = __n; \ register __typeof__(__a) a0 __asm__("a0") = __a; \ register __typeof__(__b) a1 __asm__("a1") = __b; \ register __typeof__(__c) a2 __asm__("a2") = __c; \ register __typeof__(__d) a3 __asm__("a3") = __d; \ register __typeof__(__e) a4 __asm__("a4") = __e; \ register __typeof__(__f) a5 __asm__("a5") = __f; \ \ __do_syscallN("r" (a7), "0" (a0), "r" (a1), "r" (a2), "r" (a3), \ "r" (a4), "r"(a5)); \ }) #include "../syscall-defs.h" #else /* #if defined(__riscv) && __riscv_xlen == 64 */ #include "../generic/syscall.h" #endif /* #if defined(__riscv) && __riscv_xlen == 64 */ #endif /* #ifndef LIBURING_ARCH_RISCV64_SYSCALL_H */ liburing-2.6/src/arch/syscall-defs.h000066400000000000000000000045711461424365000174560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_SYSCALL_DEFS_H #define LIBURING_ARCH_SYSCALL_DEFS_H #include static inline int __sys_open(const char *pathname, int flags, mode_t mode) { /* * Some architectures don't have __NR_open, but __NR_openat. */ #ifdef __NR_open return (int) __do_syscall3(__NR_open, pathname, flags, mode); #else return (int) __do_syscall4(__NR_openat, AT_FDCWD, pathname, flags, mode); #endif } static inline ssize_t __sys_read(int fd, void *buffer, size_t size) { return (ssize_t) __do_syscall3(__NR_read, fd, buffer, size); } static inline void *__sys_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { int nr; #if defined(__NR_mmap2) nr = __NR_mmap2; offset >>= 12; #else nr = __NR_mmap; #endif return (void *) __do_syscall6(nr, addr, length, prot, flags, fd, offset); } static inline int __sys_munmap(void *addr, size_t length) { return (int) __do_syscall2(__NR_munmap, addr, length); } static inline int __sys_madvise(void *addr, size_t length, int advice) { return (int) __do_syscall3(__NR_madvise, addr, length, advice); } static inline int __sys_getrlimit(int resource, struct rlimit *rlim) { return (int) __do_syscall2(__NR_getrlimit, resource, rlim); } static inline int __sys_setrlimit(int resource, const struct rlimit *rlim) { return (int) __do_syscall2(__NR_setrlimit, resource, rlim); } static inline int __sys_close(int fd) { return (int) __do_syscall1(__NR_close, fd); } static inline int __sys_io_uring_register(unsigned int fd, unsigned int opcode, const void *arg, unsigned int nr_args) { return (int) __do_syscall4(__NR_io_uring_register, fd, opcode, arg, nr_args); } static inline int __sys_io_uring_setup(unsigned int entries, struct io_uring_params *p) { return (int) __do_syscall2(__NR_io_uring_setup, entries, p); } static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, size_t sz) { return (int) __do_syscall6(__NR_io_uring_enter, fd, to_submit, min_complete, flags, sig, sz); } static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig) { return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig, _NSIG / 8); } #endif liburing-2.6/src/arch/x86/000077500000000000000000000000001461424365000153325ustar00rootroot00000000000000liburing-2.6/src/arch/x86/lib.h000066400000000000000000000003141461424365000162470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_X86_LIB_H #define LIBURING_ARCH_X86_LIB_H static inline long get_page_size(void) { return 4096; } #endif /* #ifndef LIBURING_ARCH_X86_LIB_H */ liburing-2.6/src/arch/x86/syscall.h000066400000000000000000000160731461424365000171640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_ARCH_X86_SYSCALL_H #define LIBURING_ARCH_X86_SYSCALL_H #if defined(__x86_64__) /** * Note for syscall registers usage (x86-64): * - %rax is the syscall number. * - %rax is also the return value. * - %rdi is the 1st argument. * - %rsi is the 2nd argument. * - %rdx is the 3rd argument. * - %r10 is the 4th argument (**yes it's %r10, not %rcx!**). * - %r8 is the 5th argument. * - %r9 is the 6th argument. * * `syscall` instruction will clobber %r11 and %rcx. * * After the syscall returns to userspace: * - %r11 will contain %rflags. * - %rcx will contain the return address. * * IOW, after the syscall returns to userspace: * %r11 == %rflags and %rcx == %rip. */ #define __do_syscall0(NUM) ({ \ intptr_t rax; \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"(NUM) /* %rax */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall1(NUM, ARG1) ({ \ intptr_t rax; \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)) /* %rdi */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall2(NUM, ARG1, ARG2) ({ \ intptr_t rax; \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)), /* %rdi */ \ "S"((ARG2)) /* %rsi */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall3(NUM, ARG1, ARG2, ARG3) ({ \ intptr_t rax; \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)), /* %rdi */ \ "S"((ARG2)), /* %rsi */ \ "d"((ARG3)) /* %rdx */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall4(NUM, ARG1, ARG2, ARG3, ARG4) ({ \ intptr_t rax; \ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)), /* %rdi */ \ "S"((ARG2)), /* %rsi */ \ "d"((ARG3)), /* %rdx */ \ "r"(__r10) /* %r10 */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall5(NUM, ARG1, ARG2, ARG3, ARG4, ARG5) ({ \ intptr_t rax; \ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \ register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5); \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)), /* %rdi */ \ "S"((ARG2)), /* %rsi */ \ "d"((ARG3)), /* %rdx */ \ "r"(__r10), /* %r10 */ \ "r"(__r8) /* %r8 */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #define __do_syscall6(NUM, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) ({ \ intptr_t rax; \ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \ register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5); \ register __typeof__(ARG6) __r9 __asm__("r9") = (ARG6); \ \ __asm__ volatile( \ "syscall" \ : "=a"(rax) /* %rax */ \ : "a"((NUM)), /* %rax */ \ "D"((ARG1)), /* %rdi */ \ "S"((ARG2)), /* %rsi */ \ "d"((ARG3)), /* %rdx */ \ "r"(__r10), /* %r10 */ \ "r"(__r8), /* %r8 */ \ "r"(__r9) /* %r9 */ \ : "rcx", "r11", "memory" \ ); \ rax; \ }) #include "../syscall-defs.h" #else /* #if defined(__x86_64__) */ #ifdef CONFIG_NOLIBC /** * Note for syscall registers usage (x86, 32-bit): * - %eax is the syscall number. * - %eax is also the return value. * - %ebx is the 1st argument. * - %ecx is the 2nd argument. * - %edx is the 3rd argument. * - %esi is the 4th argument. * - %edi is the 5th argument. * - %ebp is the 6th argument. */ #define __do_syscall0(NUM) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a"(eax) /* %eax */ \ : "a"(NUM) /* %eax */ \ : "memory" \ ); \ eax; \ }) #define __do_syscall1(NUM, ARG1) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a"(eax) /* %eax */ \ : "a"(NUM), /* %eax */ \ "b"((ARG1)) /* %ebx */ \ : "memory" \ ); \ eax; \ }) #define __do_syscall2(NUM, ARG1, ARG2) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a" (eax) /* %eax */ \ : "a"(NUM), /* %eax */ \ "b"((ARG1)), /* %ebx */ \ "c"((ARG2)) /* %ecx */ \ : "memory" \ ); \ eax; \ }) #define __do_syscall3(NUM, ARG1, ARG2, ARG3) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a" (eax) /* %eax */ \ : "a"(NUM), /* %eax */ \ "b"((ARG1)), /* %ebx */ \ "c"((ARG2)), /* %ecx */ \ "d"((ARG3)) /* %edx */ \ : "memory" \ ); \ eax; \ }) #define __do_syscall4(NUM, ARG1, ARG2, ARG3, ARG4) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a" (eax) /* %eax */ \ : "a"(NUM), /* %eax */ \ "b"((ARG1)), /* %ebx */ \ "c"((ARG2)), /* %ecx */ \ "d"((ARG3)), /* %edx */ \ "S"((ARG4)) /* %esi */ \ : "memory" \ ); \ eax; \ }) #define __do_syscall5(NUM, ARG1, ARG2, ARG3, ARG4, ARG5) ({ \ intptr_t eax; \ \ __asm__ volatile( \ "int $0x80" \ : "=a" (eax) /* %eax */ \ : "a"(NUM), /* %eax */ \ "b"((ARG1)), /* %ebx */ \ "c"((ARG2)), /* %ecx */ \ "d"((ARG3)), /* %edx */ \ "S"((ARG4)), /* %esi */ \ "D"((ARG5)) /* %edi */ \ : "memory" \ ); \ eax; \ }) /* * On i386, the 6th argument of syscall goes in %ebp. However, both Clang * and GCC cannot use %ebp in the clobber list and in the "r" constraint * without using -fomit-frame-pointer. To make it always available for * any kind of compilation, the below workaround is implemented: * * 1) Push the 6-th argument. * 2) Push %ebp. * 3) Load the 6-th argument from 4(%esp) to %ebp. * 4) Do the syscall (int $0x80). * 5) Pop %ebp (restore the old value of %ebp). * 6) Add %esp by 4 (undo the stack pointer). * * WARNING: * Don't use register variables for __do_syscall6(), there is a known * GCC bug that results in an endless loop. * * BugLink: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105032 * */ #define __do_syscall6(NUM, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) ({ \ intptr_t eax = (intptr_t)(NUM); \ intptr_t arg6 = (intptr_t)(ARG6); /* Always in memory */ \ __asm__ volatile ( \ "pushl %[_arg6]\n\t" \ "pushl %%ebp\n\t" \ "movl 4(%%esp),%%ebp\n\t" \ "int $0x80\n\t" \ "popl %%ebp\n\t" \ "addl $4,%%esp" \ : "+a"(eax) /* %eax */ \ : "b"(ARG1), /* %ebx */ \ "c"(ARG2), /* %ecx */ \ "d"(ARG3), /* %edx */ \ "S"(ARG4), /* %esi */ \ "D"(ARG5), /* %edi */ \ [_arg6]"m"(arg6) /* memory */ \ : "memory", "cc" \ ); \ eax; \ }) #include "../syscall-defs.h" #else /* #ifdef CONFIG_NOLIBC */ #include "../generic/syscall.h" #endif /* #ifdef CONFIG_NOLIBC */ #endif /* #if defined(__x86_64__) */ #endif /* #ifndef LIBURING_ARCH_X86_SYSCALL_H */ liburing-2.6/src/ffi.c000066400000000000000000000006151461424365000147020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #define IOURINGINLINE #ifdef __clang__ // clang doesn't seem to particularly like that we're including a header that // deliberately contains function definitions so we explicitly silence it #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-prototypes" #endif #include "liburing.h" #ifdef __clang__ #pragma clang diagnostic pop #endif liburing-2.6/src/include/000077500000000000000000000000001461424365000154135ustar00rootroot00000000000000liburing-2.6/src/include/liburing.h000066400000000000000000001302121461424365000173760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIB_URING_H #define LIB_URING_H #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing/compat.h" #include "liburing/io_uring.h" #include "liburing/io_uring_version.h" #include "liburing/barrier.h" #ifndef uring_unlikely #define uring_unlikely(cond) __builtin_expect(!!(cond), 0) #endif #ifndef uring_likely #define uring_likely(cond) __builtin_expect(!!(cond), 1) #endif #ifndef IOURINGINLINE #define IOURINGINLINE static inline #endif #ifdef __alpha__ /* * alpha and mips are the exceptions, all other architectures have * common numbers for new system calls. */ #ifndef __NR_io_uring_setup #define __NR_io_uring_setup 535 #endif #ifndef __NR_io_uring_enter #define __NR_io_uring_enter 536 #endif #ifndef __NR_io_uring_register #define __NR_io_uring_register 537 #endif #elif defined __mips__ #ifndef __NR_io_uring_setup #define __NR_io_uring_setup (__NR_Linux + 425) #endif #ifndef __NR_io_uring_enter #define __NR_io_uring_enter (__NR_Linux + 426) #endif #ifndef __NR_io_uring_register #define __NR_io_uring_register (__NR_Linux + 427) #endif #else /* !__alpha__ and !__mips__ */ #ifndef __NR_io_uring_setup #define __NR_io_uring_setup 425 #endif #ifndef __NR_io_uring_enter #define __NR_io_uring_enter 426 #endif #ifndef __NR_io_uring_register #define __NR_io_uring_register 427 #endif #endif #ifdef __cplusplus extern "C" { #endif /* * Library interface to io_uring */ struct io_uring_sq { unsigned *khead; unsigned *ktail; // Deprecated: use `ring_mask` instead of `*kring_mask` unsigned *kring_mask; // Deprecated: use `ring_entries` instead of `*kring_entries` unsigned *kring_entries; unsigned *kflags; unsigned *kdropped; unsigned *array; struct io_uring_sqe *sqes; unsigned sqe_head; unsigned sqe_tail; size_t ring_sz; void *ring_ptr; unsigned ring_mask; unsigned ring_entries; unsigned pad[2]; }; struct io_uring_cq { unsigned *khead; unsigned *ktail; // Deprecated: use `ring_mask` instead of `*kring_mask` unsigned *kring_mask; // Deprecated: use `ring_entries` instead of `*kring_entries` unsigned *kring_entries; unsigned *kflags; unsigned *koverflow; struct io_uring_cqe *cqes; size_t ring_sz; void *ring_ptr; unsigned ring_mask; unsigned ring_entries; unsigned pad[2]; }; struct io_uring { struct io_uring_sq sq; struct io_uring_cq cq; unsigned flags; int ring_fd; unsigned features; int enter_ring_fd; __u8 int_flags; __u8 pad[3]; unsigned pad2; }; /* * Library interface */ /* * return an allocated io_uring_probe structure, or NULL if probe fails (for * example, if it is not available). The caller is responsible for freeing it */ struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring); /* same as io_uring_get_probe_ring, but takes care of ring init and teardown */ struct io_uring_probe *io_uring_get_probe(void); /* * frees a probe allocated through io_uring_get_probe() or * io_uring_get_probe_ring() */ void io_uring_free_probe(struct io_uring_probe *probe); IOURINGINLINE int io_uring_opcode_supported(const struct io_uring_probe *p, int op) { if (op > p->last_op) return 0; return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0; } int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size); int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p); int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags); int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring); int io_uring_ring_dontfork(struct io_uring *ring); void io_uring_queue_exit(struct io_uring *ring); unsigned io_uring_peek_batch_cqe(struct io_uring *ring, struct io_uring_cqe **cqes, unsigned count); int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask); int io_uring_wait_cqe_timeout(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts); int io_uring_submit(struct io_uring *ring); int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); int io_uring_submit_and_wait_timeout(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask); int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, unsigned nr_iovecs); int io_uring_register_buffers_tags(struct io_uring *ring, const struct iovec *iovecs, const __u64 *tags, unsigned nr); int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr); int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off, const struct iovec *iovecs, const __u64 *tags, unsigned nr); int io_uring_unregister_buffers(struct io_uring *ring); int io_uring_register_files(struct io_uring *ring, const int *files, unsigned nr_files); int io_uring_register_files_tags(struct io_uring *ring, const int *files, const __u64 *tags, unsigned nr); int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr); int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off, const int *files, const __u64 *tags, unsigned nr_files); int io_uring_unregister_files(struct io_uring *ring); int io_uring_register_files_update(struct io_uring *ring, unsigned off, const int *files, unsigned nr_files); int io_uring_register_eventfd(struct io_uring *ring, int fd); int io_uring_register_eventfd_async(struct io_uring *ring, int fd); int io_uring_unregister_eventfd(struct io_uring *ring); int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p, unsigned nr); int io_uring_register_personality(struct io_uring *ring); int io_uring_unregister_personality(struct io_uring *ring, int id); int io_uring_register_restrictions(struct io_uring *ring, struct io_uring_restriction *res, unsigned int nr_res); int io_uring_enable_rings(struct io_uring *ring); int __io_uring_sqring_wait(struct io_uring *ring); int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz, const cpu_set_t *mask); int io_uring_unregister_iowq_aff(struct io_uring *ring); int io_uring_register_iowq_max_workers(struct io_uring *ring, unsigned int *values); int io_uring_register_ring_fd(struct io_uring *ring); int io_uring_unregister_ring_fd(struct io_uring *ring); int io_uring_close_ring_fd(struct io_uring *ring); int io_uring_register_buf_ring(struct io_uring *ring, struct io_uring_buf_reg *reg, unsigned int flags); int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid); int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head); int io_uring_register_sync_cancel(struct io_uring *ring, struct io_uring_sync_cancel_reg *reg); int io_uring_register_file_alloc_range(struct io_uring *ring, unsigned off, unsigned len); int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi); int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi); int io_uring_get_events(struct io_uring *ring); int io_uring_submit_and_get_events(struct io_uring *ring); /* * io_uring syscalls. */ int io_uring_enter(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig); int io_uring_enter2(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, size_t sz); int io_uring_setup(unsigned int entries, struct io_uring_params *p); int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg, unsigned int nr_args); /* * Mapped buffer ring alloc/register + unregister/free helpers */ struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring, unsigned int nentries, int bgid, unsigned int flags, int *ret); int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br, unsigned int nentries, int bgid); /* * Helper for the peek/wait single cqe functions. Exported because of that, * but probably shouldn't be used directly in an application. */ int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned submit, unsigned wait_nr, sigset_t *sigmask); #define LIBURING_UDATA_TIMEOUT ((__u64) -1) /* * Calculates the step size for CQE iteration. * For standard CQE's its 1, for big CQE's its two. */ #define io_uring_cqe_shift(ring) \ (!!((ring)->flags & IORING_SETUP_CQE32)) #define io_uring_cqe_index(ring,ptr,mask) \ (((ptr) & (mask)) << io_uring_cqe_shift(ring)) #define io_uring_for_each_cqe(ring, head, cqe) \ /* \ * io_uring_smp_load_acquire() enforces the order of tail \ * and CQE reads. \ */ \ for (head = *(ring)->cq.khead; \ (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \ &(ring)->cq.cqes[io_uring_cqe_index(ring, head, (ring)->cq.ring_mask)] : NULL)); \ head++) \ /* * Must be called after io_uring_for_each_cqe() */ IOURINGINLINE void io_uring_cq_advance(struct io_uring *ring, unsigned nr) { if (nr) { struct io_uring_cq *cq = &ring->cq; /* * Ensure that the kernel only sees the new value of the head * index after the CQEs have been read. */ io_uring_smp_store_release(cq->khead, *cq->khead + nr); } } /* * Must be called after io_uring_{peek,wait}_cqe() after the cqe has * been processed by the application. */ IOURINGINLINE void io_uring_cqe_seen(struct io_uring *ring, struct io_uring_cqe *cqe) { if (cqe) io_uring_cq_advance(ring, 1); } /* * Command prep helpers */ /* * Associate pointer @data with the sqe, for later retrieval from the cqe * at command completion time with io_uring_cqe_get_data(). */ IOURINGINLINE void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) { sqe->user_data = (unsigned long) data; } IOURINGINLINE void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe) { return (void *) (uintptr_t) cqe->user_data; } /* * Assign a 64-bit value to this sqe, which can get retrieved at completion * time with io_uring_cqe_get_data64. Just like the non-64 variants, except * these store a 64-bit type rather than a data pointer. */ IOURINGINLINE void io_uring_sqe_set_data64(struct io_uring_sqe *sqe, __u64 data) { sqe->user_data = data; } IOURINGINLINE __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe) { return cqe->user_data; } /* * Tell the app the have the 64-bit variants of the get/set userdata */ #define LIBURING_HAVE_DATA64 IOURINGINLINE void io_uring_sqe_set_flags(struct io_uring_sqe *sqe, unsigned flags) { sqe->flags = (__u8) flags; } IOURINGINLINE void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe, unsigned int file_index) { /* 0 means no fixed files, indexes should be encoded as "index + 1" */ sqe->file_index = file_index + 1; } IOURINGINLINE void io_uring_initialize_sqe(struct io_uring_sqe *sqe) { sqe->flags = 0; sqe->ioprio = 0; sqe->rw_flags = 0; sqe->buf_index = 0; sqe->personality = 0; sqe->file_index = 0; sqe->addr3 = 0; sqe->__pad2[0] = 0; } IOURINGINLINE void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, const void *addr, unsigned len, __u64 offset) { sqe->opcode = (__u8) op; sqe->fd = fd; sqe->off = offset; sqe->addr = (unsigned long) addr; sqe->len = len; } /* * io_uring_prep_splice() - Either @fd_in or @fd_out must be a pipe. * * - If @fd_in refers to a pipe, @off_in is ignored and must be set to -1. * * - If @fd_in does not refer to a pipe and @off_in is -1, then @nbytes are read * from @fd_in starting from the file offset, which is incremented by the * number of bytes read. * * - If @fd_in does not refer to a pipe and @off_in is not -1, then the starting * offset of @fd_in will be @off_in. * * This splice operation can be used to implement sendfile by splicing to an * intermediate pipe first, then splice to the final destination. * In fact, the implementation of sendfile in kernel uses splice internally. * * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation * can still fail with EINVAL if one of the fd doesn't explicitly support splice * operation, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11. * Check issue #291 for more information. */ IOURINGINLINE void io_uring_prep_splice(struct io_uring_sqe *sqe, int fd_in, int64_t off_in, int fd_out, int64_t off_out, unsigned int nbytes, unsigned int splice_flags) { io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes, (__u64) off_out); sqe->splice_off_in = (__u64) off_in; sqe->splice_fd_in = fd_in; sqe->splice_flags = splice_flags; } IOURINGINLINE void io_uring_prep_tee(struct io_uring_sqe *sqe, int fd_in, int fd_out, unsigned int nbytes, unsigned int splice_flags) { io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0); sqe->splice_off_in = 0; sqe->splice_fd_in = fd_in; sqe->splice_flags = splice_flags; } IOURINGINLINE void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, __u64 offset) { io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); } IOURINGINLINE void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, __u64 offset, int flags) { io_uring_prep_readv(sqe, fd, iovecs, nr_vecs, offset); sqe->rw_flags = flags; } IOURINGINLINE void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, void *buf, unsigned nbytes, __u64 offset, int buf_index) { io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); sqe->buf_index = (__u16) buf_index; } IOURINGINLINE void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, __u64 offset) { io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); } IOURINGINLINE void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, __u64 offset, int flags) { io_uring_prep_writev(sqe, fd, iovecs, nr_vecs, offset); sqe->rw_flags = flags; } IOURINGINLINE void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, const void *buf, unsigned nbytes, __u64 offset, int buf_index) { io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); sqe->buf_index = (__u16) buf_index; } IOURINGINLINE void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd, struct msghdr *msg, unsigned flags) { io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0); sqe->msg_flags = flags; } IOURINGINLINE void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *sqe, int fd, struct msghdr *msg, unsigned flags) { io_uring_prep_recvmsg(sqe, fd, msg, flags); sqe->ioprio |= IORING_RECV_MULTISHOT; } IOURINGINLINE void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd, const struct msghdr *msg, unsigned flags) { io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0); sqe->msg_flags = flags; } IOURINGINLINE unsigned __io_uring_prep_poll_mask(unsigned poll_mask) { #if __BYTE_ORDER == __BIG_ENDIAN poll_mask = __swahw32(poll_mask); #endif return poll_mask; } IOURINGINLINE void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, unsigned poll_mask) { io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0); sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask); } IOURINGINLINE void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe, int fd, unsigned poll_mask) { io_uring_prep_poll_add(sqe, fd, poll_mask); sqe->len = IORING_POLL_ADD_MULTI; } IOURINGINLINE void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, __u64 user_data) { io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0); sqe->addr = user_data; } IOURINGINLINE void io_uring_prep_poll_update(struct io_uring_sqe *sqe, __u64 old_user_data, __u64 new_user_data, unsigned poll_mask, unsigned flags) { io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, flags, new_user_data); sqe->addr = old_user_data; sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask); } IOURINGINLINE void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, unsigned fsync_flags) { io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0); sqe->fsync_flags = fsync_flags; } IOURINGINLINE void io_uring_prep_nop(struct io_uring_sqe *sqe) { io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0); } IOURINGINLINE void io_uring_prep_timeout(struct io_uring_sqe *sqe, struct __kernel_timespec *ts, unsigned count, unsigned flags) { io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count); sqe->timeout_flags = flags; } IOURINGINLINE void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe, __u64 user_data, unsigned flags) { io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0); sqe->addr = user_data; sqe->timeout_flags = flags; } IOURINGINLINE void io_uring_prep_timeout_update(struct io_uring_sqe *sqe, struct __kernel_timespec *ts, __u64 user_data, unsigned flags) { io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, (uintptr_t) ts); sqe->addr = user_data; sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE; } IOURINGINLINE void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd, struct sockaddr *addr, socklen_t *addrlen, int flags) { io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0, (__u64) (unsigned long) addrlen); sqe->accept_flags = (__u32) flags; } /* accept directly into the fixed file table */ IOURINGINLINE void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd, struct sockaddr *addr, socklen_t *addrlen, int flags, unsigned int file_index) { io_uring_prep_accept(sqe, fd, addr, addrlen, flags); /* offset by 1 for allocation */ if (file_index == IORING_FILE_INDEX_ALLOC) file_index--; __io_uring_set_target_fixed_file(sqe, file_index); } IOURINGINLINE void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe, int fd, struct sockaddr *addr, socklen_t *addrlen, int flags) { io_uring_prep_accept(sqe, fd, addr, addrlen, flags); sqe->ioprio |= IORING_ACCEPT_MULTISHOT; } /* multishot accept directly into the fixed file table */ IOURINGINLINE void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe, int fd, struct sockaddr *addr, socklen_t *addrlen, int flags) { io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags); __io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1); } IOURINGINLINE void io_uring_prep_cancel64(struct io_uring_sqe *sqe, __u64 user_data, int flags) { io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0); sqe->addr = user_data; sqe->cancel_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data, int flags) { io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags); } IOURINGINLINE void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd, unsigned int flags) { io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0); sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD; } IOURINGINLINE void io_uring_prep_link_timeout(struct io_uring_sqe *sqe, struct __kernel_timespec *ts, unsigned flags) { io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0); sqe->timeout_flags = flags; } IOURINGINLINE void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd, const struct sockaddr *addr, socklen_t addrlen) { io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen); } IOURINGINLINE void io_uring_prep_files_update(struct io_uring_sqe *sqe, int *fds, unsigned nr_fds, int offset) { io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds, (__u64) offset); } IOURINGINLINE void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd, int mode, __u64 offset, __u64 len) { io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd, 0, (unsigned int) mode, (__u64) offset); sqe->addr = (__u64) len; } IOURINGINLINE void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd, const char *path, int flags, mode_t mode) { io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0); sqe->open_flags = (__u32) flags; } /* open directly into the fixed file table */ IOURINGINLINE void io_uring_prep_openat_direct(struct io_uring_sqe *sqe, int dfd, const char *path, int flags, mode_t mode, unsigned file_index) { io_uring_prep_openat(sqe, dfd, path, flags, mode); /* offset by 1 for allocation */ if (file_index == IORING_FILE_INDEX_ALLOC) file_index--; __io_uring_set_target_fixed_file(sqe, file_index); } IOURINGINLINE void io_uring_prep_close(struct io_uring_sqe *sqe, int fd) { io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0); } IOURINGINLINE void io_uring_prep_close_direct(struct io_uring_sqe *sqe, unsigned file_index) { io_uring_prep_close(sqe, 0); __io_uring_set_target_fixed_file(sqe, file_index); } IOURINGINLINE void io_uring_prep_read(struct io_uring_sqe *sqe, int fd, void *buf, unsigned nbytes, __u64 offset) { io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset); } IOURINGINLINE void io_uring_prep_read_multishot(struct io_uring_sqe *sqe, int fd, unsigned nbytes, __u64 offset, int buf_group) { io_uring_prep_rw(IORING_OP_READ_MULTISHOT, sqe, fd, NULL, nbytes, offset); sqe->buf_group = buf_group; } IOURINGINLINE void io_uring_prep_write(struct io_uring_sqe *sqe, int fd, const void *buf, unsigned nbytes, __u64 offset) { io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset); } struct statx; IOURINGINLINE void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd, const char *path, int flags, unsigned mask, struct statx *statxbuf) { io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask, (__u64) (unsigned long) statxbuf); sqe->statx_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd, __u64 offset, off_t len, int advice) { io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset); sqe->fadvise_advice = (__u32) advice; } IOURINGINLINE void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr, off_t length, int advice) { io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0); sqe->fadvise_advice = (__u32) advice; } IOURINGINLINE void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, const void *buf, size_t len, int flags) { io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0); sqe->msg_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe, const struct sockaddr *dest_addr, __u16 addr_len) { sqe->addr2 = (unsigned long)(const void *)dest_addr; sqe->addr_len = addr_len; } IOURINGINLINE void io_uring_prep_sendto(struct io_uring_sqe *sqe, int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *addr, socklen_t addrlen) { io_uring_prep_send(sqe, sockfd, buf, len, flags); io_uring_prep_send_set_addr(sqe, addr, addrlen); } IOURINGINLINE void io_uring_prep_send_zc(struct io_uring_sqe *sqe, int sockfd, const void *buf, size_t len, int flags, unsigned zc_flags) { io_uring_prep_rw(IORING_OP_SEND_ZC, sqe, sockfd, buf, (__u32) len, 0); sqe->msg_flags = (__u32) flags; sqe->ioprio = zc_flags; } IOURINGINLINE void io_uring_prep_send_zc_fixed(struct io_uring_sqe *sqe, int sockfd, const void *buf, size_t len, int flags, unsigned zc_flags, unsigned buf_index) { io_uring_prep_send_zc(sqe, sockfd, buf, len, flags, zc_flags); sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; sqe->buf_index = buf_index; } IOURINGINLINE void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd, const struct msghdr *msg, unsigned flags) { io_uring_prep_sendmsg(sqe, fd, msg, flags); sqe->opcode = IORING_OP_SENDMSG_ZC; } IOURINGINLINE void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd, void *buf, size_t len, int flags) { io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0); sqe->msg_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_recv_multishot(struct io_uring_sqe *sqe, int sockfd, void *buf, size_t len, int flags) { io_uring_prep_recv(sqe, sockfd, buf, len, flags); sqe->ioprio |= IORING_RECV_MULTISHOT; } IOURINGINLINE struct io_uring_recvmsg_out * io_uring_recvmsg_validate(void *buf, int buf_len, struct msghdr *msgh) { unsigned long header = msgh->msg_controllen + msgh->msg_namelen + sizeof(struct io_uring_recvmsg_out); if (buf_len < 0 || (unsigned long)buf_len < header) return NULL; return (struct io_uring_recvmsg_out *)buf; } IOURINGINLINE void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *o) { return (void *) &o[1]; } IOURINGINLINE struct cmsghdr * io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh) { if (o->controllen < sizeof(struct cmsghdr)) return NULL; return (struct cmsghdr *)((unsigned char *) io_uring_recvmsg_name(o) + msgh->msg_namelen); } IOURINGINLINE struct cmsghdr * io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh, struct cmsghdr *cmsg) { unsigned char *end; if (cmsg->cmsg_len < sizeof(struct cmsghdr)) return NULL; end = (unsigned char *) io_uring_recvmsg_cmsg_firsthdr(o, msgh) + o->controllen; cmsg = (struct cmsghdr *)((unsigned char *) cmsg + CMSG_ALIGN(cmsg->cmsg_len)); if ((unsigned char *) (cmsg + 1) > end) return NULL; if (((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len) > end) return NULL; return cmsg; } IOURINGINLINE void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *o, struct msghdr *msgh) { return (void *)((unsigned char *)io_uring_recvmsg_name(o) + msgh->msg_namelen + msgh->msg_controllen); } IOURINGINLINE unsigned int io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *o, int buf_len, struct msghdr *msgh) { unsigned long payload_start, payload_end; payload_start = (unsigned long) io_uring_recvmsg_payload(o, msgh); payload_end = (unsigned long) o + buf_len; return (unsigned int) (payload_end - payload_start); } IOURINGINLINE void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd, const char *path, struct open_how *how) { io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how), (uint64_t) (uintptr_t) how); } /* open directly into the fixed file table */ IOURINGINLINE void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe, int dfd, const char *path, struct open_how *how, unsigned file_index) { io_uring_prep_openat2(sqe, dfd, path, how); /* offset by 1 for allocation */ if (file_index == IORING_FILE_INDEX_ALLOC) file_index--; __io_uring_set_target_fixed_file(sqe, file_index); } struct epoll_event; IOURINGINLINE void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd, int fd, int op, struct epoll_event *ev) { io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev, (__u32) op, (__u32) fd); } IOURINGINLINE void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe, void *addr, int len, int nr, int bgid, int bid) { io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len, (__u64) bid); sqe->buf_group = (__u16) bgid; } IOURINGINLINE void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe, int nr, int bgid) { io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0); sqe->buf_group = (__u16) bgid; } IOURINGINLINE void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd, int how) { io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0); } IOURINGINLINE void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd, const char *path, int flags) { io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0); sqe->unlink_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_unlink(struct io_uring_sqe *sqe, const char *path, int flags) { io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags); } IOURINGINLINE void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd, const char *oldpath, int newdfd, const char *newpath, unsigned int flags) { io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath, (__u32) newdfd, (uint64_t) (uintptr_t) newpath); sqe->rename_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_rename(struct io_uring_sqe *sqe, const char *oldpath, const char *newpath) { io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0); } IOURINGINLINE void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe, int fd, unsigned len, __u64 offset, int flags) { io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset); sqe->sync_range_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd, const char *path, mode_t mode) { io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0); } IOURINGINLINE void io_uring_prep_mkdir(struct io_uring_sqe *sqe, const char *path, mode_t mode) { io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode); } IOURINGINLINE void io_uring_prep_symlinkat(struct io_uring_sqe *sqe, const char *target, int newdirfd, const char *linkpath) { io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0, (uint64_t) (uintptr_t) linkpath); } IOURINGINLINE void io_uring_prep_symlink(struct io_uring_sqe *sqe, const char *target, const char *linkpath) { io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath); } IOURINGINLINE void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd, const char *oldpath, int newdfd, const char *newpath, int flags) { io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd, (uint64_t) (uintptr_t) newpath); sqe->hardlink_flags = (__u32) flags; } IOURINGINLINE void io_uring_prep_link(struct io_uring_sqe *sqe, const char *oldpath, const char *newpath, int flags) { io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags); } IOURINGINLINE void io_uring_prep_msg_ring_cqe_flags(struct io_uring_sqe *sqe, int fd, unsigned int len, __u64 data, unsigned int flags, unsigned int cqe_flags) { io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data); sqe->msg_ring_flags = IORING_MSG_RING_FLAGS_PASS | flags; sqe->file_index = cqe_flags; } IOURINGINLINE void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd, unsigned int len, __u64 data, unsigned int flags) { io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data); sqe->msg_ring_flags = flags; } IOURINGINLINE void io_uring_prep_msg_ring_fd(struct io_uring_sqe *sqe, int fd, int source_fd, int target_fd, __u64 data, unsigned int flags) { io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, (void *) (uintptr_t) IORING_MSG_SEND_FD, 0, data); sqe->addr3 = source_fd; /* offset by 1 for allocation */ if ((unsigned int) target_fd == IORING_FILE_INDEX_ALLOC) target_fd--; __io_uring_set_target_fixed_file(sqe, target_fd); sqe->msg_ring_flags = flags; } IOURINGINLINE void io_uring_prep_msg_ring_fd_alloc(struct io_uring_sqe *sqe, int fd, int source_fd, __u64 data, unsigned int flags) { io_uring_prep_msg_ring_fd(sqe, fd, source_fd, IORING_FILE_INDEX_ALLOC, data, flags); } IOURINGINLINE void io_uring_prep_getxattr(struct io_uring_sqe *sqe, const char *name, char *value, const char *path, unsigned int len) { io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len, (__u64) (uintptr_t) value); sqe->addr3 = (__u64) (uintptr_t) path; sqe->xattr_flags = 0; } IOURINGINLINE void io_uring_prep_setxattr(struct io_uring_sqe *sqe, const char *name, const char *value, const char *path, int flags, unsigned int len) { io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len, (__u64) (uintptr_t) value); sqe->addr3 = (__u64) (uintptr_t) path; sqe->xattr_flags = flags; } IOURINGINLINE void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe, int fd, const char *name, char *value, unsigned int len) { io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len, (__u64) (uintptr_t) value); sqe->xattr_flags = 0; } IOURINGINLINE void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe, int fd, const char *name, const char *value, int flags, unsigned int len) { io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len, (__u64) (uintptr_t) value); sqe->xattr_flags = flags; } IOURINGINLINE void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain, int type, int protocol, unsigned int flags) { io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type); sqe->rw_flags = flags; } IOURINGINLINE void io_uring_prep_socket_direct(struct io_uring_sqe *sqe, int domain, int type, int protocol, unsigned file_index, unsigned int flags) { io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type); sqe->rw_flags = flags; /* offset by 1 for allocation */ if (file_index == IORING_FILE_INDEX_ALLOC) file_index--; __io_uring_set_target_fixed_file(sqe, file_index); } IOURINGINLINE void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe, int domain, int type, int protocol, unsigned int flags) { io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type); sqe->rw_flags = flags; __io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1); } /* * Prepare commands for sockets */ IOURINGINLINE void io_uring_prep_cmd_sock(struct io_uring_sqe *sqe, int cmd_op, int fd, int level, int optname, void *optval, int optlen) { io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, NULL, 0, 0); sqe->optval = (unsigned long) (uintptr_t) optval; sqe->optname = optname; sqe->optlen = optlen; sqe->cmd_op = cmd_op; sqe->level = level; } IOURINGINLINE void io_uring_prep_waitid(struct io_uring_sqe *sqe, idtype_t idtype, id_t id, siginfo_t *infop, int options, unsigned int flags) { io_uring_prep_rw(IORING_OP_WAITID, sqe, id, NULL, (unsigned) idtype, 0); sqe->waitid_flags = flags; sqe->file_index = options; sqe->addr2 = (unsigned long) infop; } IOURINGINLINE void io_uring_prep_futex_wake(struct io_uring_sqe *sqe, uint32_t *futex, uint64_t val, uint64_t mask, uint32_t futex_flags, unsigned int flags) { io_uring_prep_rw(IORING_OP_FUTEX_WAKE, sqe, futex_flags, futex, 0, val); sqe->futex_flags = flags; sqe->addr3 = mask; } IOURINGINLINE void io_uring_prep_futex_wait(struct io_uring_sqe *sqe, uint32_t *futex, uint64_t val, uint64_t mask, uint32_t futex_flags, unsigned int flags) { io_uring_prep_rw(IORING_OP_FUTEX_WAIT, sqe, futex_flags, futex, 0, val); sqe->futex_flags = flags; sqe->addr3 = mask; } struct futex_waitv; IOURINGINLINE void io_uring_prep_futex_waitv(struct io_uring_sqe *sqe, struct futex_waitv *futex, uint32_t nr_futex, unsigned int flags) { io_uring_prep_rw(IORING_OP_FUTEX_WAITV, sqe, 0, futex, nr_futex, 0); sqe->futex_flags = flags; } IOURINGINLINE void io_uring_prep_fixed_fd_install(struct io_uring_sqe *sqe, int fd, unsigned int flags) { io_uring_prep_rw(IORING_OP_FIXED_FD_INSTALL, sqe, fd, NULL, 0, 0); sqe->flags = IOSQE_FIXED_FILE; sqe->install_fd_flags = flags; } IOURINGINLINE void io_uring_prep_ftruncate(struct io_uring_sqe *sqe, int fd, loff_t len) { io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, 0, 0, len); } /* * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in * the SQ ring */ IOURINGINLINE unsigned io_uring_sq_ready(const struct io_uring *ring) { unsigned khead; /* * Without a barrier, we could miss an update and think the SQ wasn't * ready. We don't need the load acquire for non-SQPOLL since then we * drive updates. */ if (ring->flags & IORING_SETUP_SQPOLL) khead = io_uring_smp_load_acquire(ring->sq.khead); else khead = *ring->sq.khead; /* always use real head, to avoid losing sync for short submit */ return ring->sq.sqe_tail - khead; } /* * Returns how much space is left in the SQ ring. */ IOURINGINLINE unsigned io_uring_sq_space_left(const struct io_uring *ring) { return ring->sq.ring_entries - io_uring_sq_ready(ring); } /* * Only applicable when using SQPOLL - allows the caller to wait for space * to free up in the SQ ring, which happens when the kernel side thread has * consumed one or more entries. If the SQ ring is currently non-full, no * action is taken. Note: may return -EINVAL if the kernel doesn't support * this feature. */ IOURINGINLINE int io_uring_sqring_wait(struct io_uring *ring) { if (!(ring->flags & IORING_SETUP_SQPOLL)) return 0; if (io_uring_sq_space_left(ring)) return 0; return __io_uring_sqring_wait(ring); } /* * Returns how many unconsumed entries are ready in the CQ ring */ IOURINGINLINE unsigned io_uring_cq_ready(const struct io_uring *ring) { return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead; } /* * Returns true if there are overflow entries waiting to be flushed onto * the CQ ring */ IOURINGINLINE bool io_uring_cq_has_overflow(const struct io_uring *ring) { return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW; } /* * Returns true if the eventfd notification is currently enabled */ IOURINGINLINE bool io_uring_cq_eventfd_enabled(const struct io_uring *ring) { if (!ring->cq.kflags) return true; return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED); } /* * Toggle eventfd notification on or off, if an eventfd is registered with * the ring. */ IOURINGINLINE int io_uring_cq_eventfd_toggle(struct io_uring *ring, bool enabled) { uint32_t flags; if (!!enabled == io_uring_cq_eventfd_enabled(ring)) return 0; if (!ring->cq.kflags) return -EOPNOTSUPP; flags = *ring->cq.kflags; if (enabled) flags &= ~IORING_CQ_EVENTFD_DISABLED; else flags |= IORING_CQ_EVENTFD_DISABLED; IO_URING_WRITE_ONCE(*ring->cq.kflags, flags); return 0; } /* * Return an IO completion, waiting for 'wait_nr' completions if one isn't * readily available. Returns 0 with cqe_ptr filled in on success, -errno on * failure. */ IOURINGINLINE int io_uring_wait_cqe_nr(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr) { return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL); } /* * Internal helper, don't use directly in applications. Use one of the * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(), * or io_uring_wait_cqes*(). */ IOURINGINLINE int __io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned *nr_available) { struct io_uring_cqe *cqe; int err = 0; unsigned available; unsigned mask = ring->cq.ring_mask; int shift = 0; if (ring->flags & IORING_SETUP_CQE32) shift = 1; do { unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail); unsigned head = *ring->cq.khead; cqe = NULL; available = tail - head; if (!available) break; cqe = &ring->cq.cqes[(head & mask) << shift]; if (!(ring->features & IORING_FEAT_EXT_ARG) && cqe->user_data == LIBURING_UDATA_TIMEOUT) { if (cqe->res < 0) err = cqe->res; io_uring_cq_advance(ring, 1); if (!err) continue; cqe = NULL; } break; } while (1); *cqe_ptr = cqe; if (nr_available) *nr_available = available; return err; } /* * Return an IO completion, if one is readily available. Returns 0 with * cqe_ptr filled in on success, -errno on failure. */ IOURINGINLINE int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) { if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr) return 0; return io_uring_wait_cqe_nr(ring, cqe_ptr, 0); } /* * Return an IO completion, waiting for it if necessary. Returns 0 with * cqe_ptr filled in on success, -errno on failure. */ IOURINGINLINE int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) { if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr) return 0; return io_uring_wait_cqe_nr(ring, cqe_ptr, 1); } /* * Return an sqe to fill. Application must later call io_uring_submit() * when it's ready to tell the kernel about it. The caller may call this * function multiple times before calling io_uring_submit(). * * Returns a vacant sqe, or NULL if we're full. */ IOURINGINLINE struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring) { struct io_uring_sq *sq = &ring->sq; unsigned int head, next = sq->sqe_tail + 1; int shift = 0; if (ring->flags & IORING_SETUP_SQE128) shift = 1; if (!(ring->flags & IORING_SETUP_SQPOLL)) head = *sq->khead; else head = io_uring_smp_load_acquire(sq->khead); if (next - head <= sq->ring_entries) { struct io_uring_sqe *sqe; sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift]; sq->sqe_tail = next; io_uring_initialize_sqe(sqe); return sqe; } return NULL; } /* * Return the appropriate mask for a buffer ring of size 'ring_entries' */ IOURINGINLINE int io_uring_buf_ring_mask(__u32 ring_entries) { return ring_entries - 1; } IOURINGINLINE void io_uring_buf_ring_init(struct io_uring_buf_ring *br) { br->tail = 0; } /* * Assign 'buf' with the addr/len/buffer ID supplied */ IOURINGINLINE void io_uring_buf_ring_add(struct io_uring_buf_ring *br, void *addr, unsigned int len, unsigned short bid, int mask, int buf_offset) { struct io_uring_buf *buf = &br->bufs[(br->tail + buf_offset) & mask]; buf->addr = (unsigned long) (uintptr_t) addr; buf->len = len; buf->bid = bid; } /* * Make 'count' new buffers visible to the kernel. Called after * io_uring_buf_ring_add() has been called 'count' times to fill in new * buffers. */ IOURINGINLINE void io_uring_buf_ring_advance(struct io_uring_buf_ring *br, int count) { unsigned short new_tail = br->tail + count; io_uring_smp_store_release(&br->tail, new_tail); } IOURINGINLINE void __io_uring_buf_ring_cq_advance(struct io_uring *ring, struct io_uring_buf_ring *br, int cq_count, int buf_count) { io_uring_buf_ring_advance(br, buf_count); io_uring_cq_advance(ring, cq_count); } /* * Make 'count' new buffers visible to the kernel while at the same time * advancing the CQ ring seen entries. This can be used when the application * is using ring provided buffers and returns buffers while processing CQEs, * avoiding an extra atomic when needing to increment both the CQ ring and * the ring buffer index at the same time. */ IOURINGINLINE void io_uring_buf_ring_cq_advance(struct io_uring *ring, struct io_uring_buf_ring *br, int count) { __io_uring_buf_ring_cq_advance(ring, br, count, count); } IOURINGINLINE int io_uring_buf_ring_available(struct io_uring *ring, struct io_uring_buf_ring *br, unsigned short bgid) { uint16_t head; int ret; ret = io_uring_buf_ring_head(ring, bgid, &head); if (ret) return ret; return (uint16_t) (br->tail - head); } #ifndef LIBURING_INTERNAL IOURINGINLINE struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) { return _io_uring_get_sqe(ring); } #else struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); #endif ssize_t io_uring_mlock_size(unsigned entries, unsigned flags); ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p); /* * Versioning information for liburing. * * Use IO_URING_CHECK_VERSION() for compile time checks including from * preprocessor directives. * * Use io_uring_check_version() for runtime checks of the version of * liburing that was loaded by the dynamic linker. */ int io_uring_major_version(void); int io_uring_minor_version(void); bool io_uring_check_version(int major, int minor); #define IO_URING_CHECK_VERSION(major,minor) \ (major > IO_URING_VERSION_MAJOR || \ (major == IO_URING_VERSION_MAJOR && \ minor > IO_URING_VERSION_MINOR)) #ifdef __cplusplus } #endif #ifdef IOURINGINLINE #undef IOURINGINLINE #endif #endif liburing-2.6/src/include/liburing/000077500000000000000000000000001461424365000172265ustar00rootroot00000000000000liburing-2.6/src/include/liburing/barrier.h000066400000000000000000000046771461424365000210430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_BARRIER_H #define LIBURING_BARRIER_H /* From the kernel documentation file refcount-vs-atomic.rst: A RELEASE memory ordering guarantees that all prior loads and stores (all po-earlier instructions) on the same CPU are completed before the operation. It also guarantees that all po-earlier stores on the same CPU and all propagated stores from other CPUs must propagate to all other CPUs before the release operation (A-cumulative property). This is implemented using :c:func:`smp_store_release`. An ACQUIRE memory ordering guarantees that all post loads and stores (all po-later instructions) on the same CPU are completed after the acquire operation. It also guarantees that all po-later stores on the same CPU must propagate to all other CPUs after the acquire operation executes. This is implemented using :c:func:`smp_acquire__after_ctrl_dep`. */ #ifdef __cplusplus #include template static inline void IO_URING_WRITE_ONCE(T &var, T val) { std::atomic_store_explicit(reinterpret_cast *>(&var), val, std::memory_order_relaxed); } template static inline T IO_URING_READ_ONCE(const T &var) { return std::atomic_load_explicit( reinterpret_cast *>(&var), std::memory_order_relaxed); } template static inline void io_uring_smp_store_release(T *p, T v) { std::atomic_store_explicit(reinterpret_cast *>(p), v, std::memory_order_release); } template static inline T io_uring_smp_load_acquire(const T *p) { return std::atomic_load_explicit( reinterpret_cast *>(p), std::memory_order_acquire); } static inline void io_uring_smp_mb() { std::atomic_thread_fence(std::memory_order_seq_cst); } #else #include #define IO_URING_WRITE_ONCE(var, val) \ atomic_store_explicit((_Atomic __typeof__(var) *)&(var), \ (val), memory_order_relaxed) #define IO_URING_READ_ONCE(var) \ atomic_load_explicit((_Atomic __typeof__(var) *)&(var), \ memory_order_relaxed) #define io_uring_smp_store_release(p, v) \ atomic_store_explicit((_Atomic __typeof__(*(p)) *)(p), (v), \ memory_order_release) #define io_uring_smp_load_acquire(p) \ atomic_load_explicit((_Atomic __typeof__(*(p)) *)(p), \ memory_order_acquire) #define io_uring_smp_mb() \ atomic_thread_fence(memory_order_seq_cst) #endif #endif /* defined(LIBURING_BARRIER_H) */ liburing-2.6/src/include/liburing/io_uring.h000066400000000000000000000462361461424365000212250ustar00rootroot00000000000000/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Header file for the io_uring interface. * * Copyright (C) 2019 Jens Axboe * Copyright (C) 2019 Christoph Hellwig */ #ifndef LINUX_IO_URING_H #define LINUX_IO_URING_H #include #include /* * this file is shared with liburing and that has to autodetect * if linux/time_types.h is available or not, it can * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H * if linux/time_types.h is not available */ #ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H #include #endif #ifdef __cplusplus extern "C" { #endif /* * IO submission data structure (Submission Queue Entry) */ struct io_uring_sqe { __u8 opcode; /* type of operation for this sqe */ __u8 flags; /* IOSQE_ flags */ __u16 ioprio; /* ioprio for the request */ __s32 fd; /* file descriptor to do IO on */ union { __u64 off; /* offset into file */ __u64 addr2; struct { __u32 cmd_op; __u32 __pad1; }; }; union { __u64 addr; /* pointer to buffer or iovecs */ __u64 splice_off_in; struct { __u32 level; __u32 optname; }; }; __u32 len; /* buffer size or number of iovecs */ union { __kernel_rwf_t rw_flags; __u32 fsync_flags; __u16 poll_events; /* compatibility */ __u32 poll32_events; /* word-reversed for BE */ __u32 sync_range_flags; __u32 msg_flags; __u32 timeout_flags; __u32 accept_flags; __u32 cancel_flags; __u32 open_flags; __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; __u32 rename_flags; __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; __u32 msg_ring_flags; __u32 uring_cmd_flags; __u32 waitid_flags; __u32 futex_flags; __u32 install_fd_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ union { /* index into fixed buffers, if used */ __u16 buf_index; /* for grouped buffer selection */ __u16 buf_group; } __attribute__((packed)); /* personality to use, if used */ __u16 personality; union { __s32 splice_fd_in; __u32 file_index; __u32 optlen; struct { __u16 addr_len; __u16 __pad3[1]; }; }; union { struct { __u64 addr3; __u64 __pad2[1]; }; __u64 optval; /* * If the ring is initialized with IORING_SETUP_SQE128, then * this field is used for 80 bytes of arbitrary command data */ __u8 cmd[0]; }; }; /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate * an available direct descriptor instead of having the application pass one * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE * if the space is full. */ #define IORING_FILE_INDEX_ALLOC (~0U) enum { IOSQE_FIXED_FILE_BIT, IOSQE_IO_DRAIN_BIT, IOSQE_IO_LINK_BIT, IOSQE_IO_HARDLINK_BIT, IOSQE_ASYNC_BIT, IOSQE_BUFFER_SELECT_BIT, IOSQE_CQE_SKIP_SUCCESS_BIT, }; /* * sqe->flags */ /* use fixed fileset */ #define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) /* issue after inflight IO */ #define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) /* links next sqe */ #define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) /* like LINK, but stronger */ #define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) /* always go async */ #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) /* select buffer from sqe->buf_group */ #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) /* don't post CQE if request succeeded */ #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) /* * io_uring_setup() flags */ #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ /* * Cooperative task running. When requests complete, they often require * forcing the submitter to transition to the kernel to complete. If this * flag is set, work will be done when the task transitions anyway, rather * than force an inter-processor interrupt reschedule. This avoids interrupting * a task running in userspace, and saves an IPI. */ #define IORING_SETUP_COOP_TASKRUN (1U << 8) /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ /* * Only one task is allowed to submit requests */ #define IORING_SETUP_SINGLE_ISSUER (1U << 12) /* * Defer running task work to get events. * Rather than running bits of task work whenever the task transitions * try to do it just before it is needed. */ #define IORING_SETUP_DEFER_TASKRUN (1U << 13) /* * Application provides ring memory */ #define IORING_SETUP_NO_MMAP (1U << 14) /* * Register the ring fd in itself for use with * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather * than an fd. */ #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) /* * Removes indirection through the SQ index array. */ #define IORING_SETUP_NO_SQARRAY (1U << 16) enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, IORING_OP_FSYNC, IORING_OP_READ_FIXED, IORING_OP_WRITE_FIXED, IORING_OP_POLL_ADD, IORING_OP_POLL_REMOVE, IORING_OP_SYNC_FILE_RANGE, IORING_OP_SENDMSG, IORING_OP_RECVMSG, IORING_OP_TIMEOUT, IORING_OP_TIMEOUT_REMOVE, IORING_OP_ACCEPT, IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, IORING_OP_FADVISE, IORING_OP_MADVISE, IORING_OP_SEND, IORING_OP_RECV, IORING_OP_OPENAT2, IORING_OP_EPOLL_CTL, IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, IORING_OP_MSG_RING, IORING_OP_FSETXATTR, IORING_OP_SETXATTR, IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SEND_ZC, IORING_OP_SENDMSG_ZC, IORING_OP_READ_MULTISHOT, IORING_OP_WAITID, IORING_OP_FUTEX_WAIT, IORING_OP_FUTEX_WAKE, IORING_OP_FUTEX_WAITV, IORING_OP_FIXED_FD_INSTALL, IORING_OP_FTRUNCATE, /* this goes last, obviously */ IORING_OP_LAST, }; /* * sqe->uring_cmd_flags * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. */ #define IORING_URING_CMD_FIXED (1U << 0) /* * sqe->fsync_flags */ #define IORING_FSYNC_DATASYNC (1U << 0) /* * sqe->timeout_flags */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) #define IORING_TIMEOUT_BOOTTIME (1U << 2) #define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_MULTISHOT (1U << 6) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* * sqe->splice_flags * extends splice(2) flags */ #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ /* * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the * command flags for POLL_ADD are stored in sqe->len. * * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if * the poll handler will continue to report * CQEs on behalf of the same SQE. * * IORING_POLL_UPDATE Update existing poll request, matching * sqe->addr as the old user_data field. * * IORING_POLL_LEVEL Level triggered poll. */ #define IORING_POLL_ADD_MULTI (1U << 0) #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) #define IORING_POLL_ADD_LEVEL (1U << 3) /* * ASYNC_CANCEL flags. * * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) * * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send * or receive and arm poll if that yields an * -EAGAIN result, arm poll upfront and skip * the initial transfer attempt. * * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if * the handler will continue to report * CQEs on behalf of the same SQE. * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. * * IORING_SEND_ZC_REPORT_USAGE * If set, SEND[MSG]_ZC should report * the zerocopy usage in cqe.res * for the IORING_CQE_F_NOTIF cqe. * 0 is reported if zerocopy was actually possible. * IORING_NOTIF_USAGE_ZC_COPIED if data was copied * (at least partially). */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) /* * cqe.res for IORING_CQE_F_NOTIF if * IORING_SEND_ZC_REPORT_USAGE was requested * * It should be treated as a flag, all other * bits of cqe.res should be treated as reserved! */ #define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) /* * accept flags stored in sqe->ioprio */ #define IORING_ACCEPT_MULTISHOT (1U << 0) /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ enum { IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ IORING_MSG_SEND_FD, /* send a registered fd to another ring */ }; /* * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) * * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not * applicable for IORING_MSG_DATA, obviously. */ #define IORING_MSG_RING_CQE_SKIP (1U << 0) /* Pass through the flags from sqe->file_index to cqe->flags */ #define IORING_MSG_RING_FLAGS_PASS (1U << 1) /* * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags) * * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC */ #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) /* * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ __u32 flags; /* * If the ring is initialized with IORING_SETUP_CQE32, then this field * contains 16-bytes of padding, doubling the size of the CQE. */ __u64 big_cqe[]; }; /* * cqe->flags * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct * them from sends. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) enum { IORING_CQE_BUFFER_SHIFT = 16, }; /* * Magic offsets for the application to mmap the data it needs */ #define IORING_OFF_SQ_RING 0ULL #define IORING_OFF_CQ_RING 0x8000000ULL #define IORING_OFF_SQES 0x10000000ULL #define IORING_OFF_PBUF_RING 0x80000000ULL #define IORING_OFF_PBUF_SHIFT 16 #define IORING_OFF_MMAP_MASK 0xf8000000ULL /* * Filled with the offset for mmap(2) */ struct io_sqring_offsets { __u32 head; __u32 tail; __u32 ring_mask; __u32 ring_entries; __u32 flags; __u32 dropped; __u32 array; __u32 resv1; __u64 user_addr; }; /* * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ #define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ struct io_cqring_offsets { __u32 head; __u32 tail; __u32 ring_mask; __u32 ring_entries; __u32 overflow; __u32 cqes; __u32 flags; __u32 resv1; __u64 user_addr; }; /* * cq_ring->flags */ /* disable eventfd notifications */ #define IORING_CQ_EVENTFD_DISABLED (1U << 0) /* * io_uring_enter(2) flags */ #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAIT (1U << 2) #define IORING_ENTER_EXT_ARG (1U << 3) #define IORING_ENTER_REGISTERED_RING (1U << 4) /* * Passed in for io_uring_setup(2). Copied back with updated info on success */ struct io_uring_params { __u32 sq_entries; __u32 cq_entries; __u32 flags; __u32 sq_thread_cpu; __u32 sq_thread_idle; __u32 features; __u32 wq_fd; __u32 resv[3]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; /* * io_uring_params->features flags */ #define IORING_FEAT_SINGLE_MMAP (1U << 0) #define IORING_FEAT_NODROP (1U << 1) #define IORING_FEAT_SUBMIT_STABLE (1U << 2) #define IORING_FEAT_RW_CUR_POS (1U << 3) #define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_POLL_32BITS (1U << 6) #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) #define IORING_FEAT_EXT_ARG (1U << 8) #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) #define IORING_FEAT_LINKED_FILE (1U << 12) #define IORING_FEAT_REG_REG_RING (1U << 13) /* * io_uring_register(2) opcodes and arguments */ enum { IORING_REGISTER_BUFFERS = 0, IORING_UNREGISTER_BUFFERS = 1, IORING_REGISTER_FILES = 2, IORING_UNREGISTER_FILES = 3, IORING_REGISTER_EVENTFD = 4, IORING_UNREGISTER_EVENTFD = 5, IORING_REGISTER_FILES_UPDATE = 6, IORING_REGISTER_EVENTFD_ASYNC = 7, IORING_REGISTER_PROBE = 8, IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, IORING_REGISTER_ENABLE_RINGS = 12, /* extended with tagging */ IORING_REGISTER_FILES2 = 13, IORING_REGISTER_FILES_UPDATE2 = 14, IORING_REGISTER_BUFFERS2 = 15, IORING_REGISTER_BUFFERS_UPDATE = 16, /* set/clear io-wq thread affinities */ IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, /* register/unregister io_uring fd with the ring */ IORING_REGISTER_RING_FDS = 20, IORING_UNREGISTER_RING_FDS = 21, /* register ring based provide buffer group */ IORING_REGISTER_PBUF_RING = 22, IORING_UNREGISTER_PBUF_RING = 23, /* sync cancelation API */ IORING_REGISTER_SYNC_CANCEL = 24, /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, /* return status information for a buffer group */ IORING_REGISTER_PBUF_STATUS = 26, /* set/clear busy poll settings */ IORING_REGISTER_NAPI = 27, IORING_UNREGISTER_NAPI = 28, /* this goes last */ IORING_REGISTER_LAST, /* flag added to the opcode to use a registered ring fd */ IORING_REGISTER_USE_REGISTERED_RING = 1U << 31 }; /* io-wq worker categories */ enum { IO_WQ_BOUND, IO_WQ_UNBOUND, }; /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; __u32 resv; __aligned_u64 /* __s32 * */ fds; }; /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. */ #define IORING_RSRC_REGISTER_SPARSE (1U << 0) struct io_uring_rsrc_register { __u32 nr; __u32 flags; __u64 resv2; __aligned_u64 data; __aligned_u64 tags; }; struct io_uring_rsrc_update { __u32 offset; __u32 resv; __aligned_u64 data; }; struct io_uring_rsrc_update2 { __u32 offset; __u32 resv; __aligned_u64 data; __aligned_u64 tags; __u32 nr; __u32 resv2; }; /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP (-2) #define IO_URING_OP_SUPPORTED (1U << 0) struct io_uring_probe_op { __u8 op; __u8 resv; __u16 flags; /* IO_URING_OP_* flags */ __u32 resv2; }; struct io_uring_probe { __u8 last_op; /* last opcode supported */ __u8 ops_len; /* length of ops[] array below */ __u16 resv; __u32 resv2[3]; struct io_uring_probe_op ops[]; }; struct io_uring_restriction { __u16 opcode; union { __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ }; __u8 resv; __u32 resv2[3]; }; struct io_uring_buf { __u64 addr; __u32 len; __u16 bid; __u16 resv; }; struct io_uring_buf_ring { union { /* * To avoid spilling into more pages than we need to, the * ring tail is overlaid with the io_uring_buf->resv field. */ struct { __u64 resv1; __u32 resv2; __u16 resv3; __u16 tail; }; struct io_uring_buf bufs[0]; }; }; /* * Flags for IORING_REGISTER_PBUF_RING. * * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. * The application must not set a ring_addr in struct * io_uring_buf_reg, instead it must subsequently call * mmap(2) with the offset set as: * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) * to get a virtual mapping for the ring. */ enum { IOU_PBUF_RING_MMAP = 1, }; /* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { __u64 ring_addr; __u32 ring_entries; __u16 bgid; __u16 flags; __u64 resv[3]; }; /* argument for IORING_REGISTER_PBUF_STATUS */ struct io_uring_buf_status { __u32 buf_group; /* input */ __u32 head; /* output */ __u32 resv[8]; }; /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { __u32 busy_poll_to; __u8 prefer_busy_poll; __u8 pad[3]; __u64 resv; }; /* * io_uring_restriction->opcode values */ enum { /* Allow an io_uring_register(2) opcode */ IORING_RESTRICTION_REGISTER_OP = 0, /* Allow an sqe opcode */ IORING_RESTRICTION_SQE_OP = 1, /* Allow sqe flags */ IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, /* Require sqe flags (these flags must be set on each submission) */ IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, IORING_RESTRICTION_LAST }; struct io_uring_getevents_arg { __u64 sigmask; __u32 sigmask_sz; __u32 pad; __u64 ts; }; /* * Argument for IORING_REGISTER_SYNC_CANCEL */ struct io_uring_sync_cancel_reg { __u64 addr; __s32 fd; __u32 flags; struct __kernel_timespec timeout; __u64 pad[4]; }; /* * Argument for IORING_REGISTER_FILE_ALLOC_RANGE * The range is specified as [off, off + len) */ struct io_uring_file_index_range { __u32 off; __u32 len; __u64 resv; }; struct io_uring_recvmsg_out { __u32 namelen; __u32 controllen; __u32 payloadlen; __u32 flags; }; /* * Argument for IORING_OP_URING_CMD when file is a socket */ enum { SOCKET_URING_OP_SIOCINQ = 0, SOCKET_URING_OP_SIOCOUTQ, SOCKET_URING_OP_GETSOCKOPT, SOCKET_URING_OP_SETSOCKOPT, }; #ifdef __cplusplus } #endif #endif liburing-2.6/src/int_flags.h000066400000000000000000000002671461424365000161140ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_INT_FLAGS #define LIBURING_INT_FLAGS enum { INT_FLAG_REG_RING = 1, INT_FLAG_REG_REG_RING = 2, INT_FLAG_APP_MEM = 4, }; #endif liburing-2.6/src/lib.h000066400000000000000000000024251461424365000147120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_LIB_H #define LIBURING_LIB_H #include #include #include #if defined(__x86_64__) || defined(__i386__) #include "arch/x86/lib.h" #elif defined(__aarch64__) #include "arch/aarch64/lib.h" #elif defined(__riscv) && __riscv_xlen == 64 #include "arch/riscv64/lib.h" #else /* * We don't have nolibc support for this arch. Must use libc! */ #ifdef CONFIG_NOLIBC #error "This arch doesn't support building liburing without libc" #endif /* libc wrappers. */ #include "arch/generic/lib.h" #endif #ifndef offsetof #define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) #endif #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \ (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \ }) #endif #define __maybe_unused __attribute__((__unused__)) #define __hot __attribute__((__hot__)) #define __cold __attribute__((__cold__)) #ifdef CONFIG_NOLIBC void *__uring_memset(void *s, int c, size_t n); void *__uring_malloc(size_t len); void __uring_free(void *p); #define malloc(LEN) __uring_malloc(LEN) #define free(PTR) __uring_free(PTR) #define memset(PTR, C, LEN) __uring_memset(PTR, C, LEN) #endif #endif /* #ifndef LIBURING_LIB_H */ liburing-2.6/src/liburing-ffi.map000066400000000000000000000116571461424365000170560ustar00rootroot00000000000000LIBURING_2.4 { global: io_uring_get_probe; io_uring_get_probe_ring; io_uring_free_probe; io_uring_get_sqe; io_uring_peek_batch_cqe; io_uring_queue_exit; io_uring_queue_init; io_uring_queue_init_params; io_uring_queue_mmap; io_uring_register_buffers; io_uring_register_eventfd; io_uring_register_eventfd_async; io_uring_register_files; io_uring_register_files_update; io_uring_register_personality; io_uring_register_probe; io_uring_ring_dontfork; io_uring_submit; io_uring_submit_and_wait; io_uring_unregister_buffers; io_uring_unregister_eventfd; io_uring_unregister_files; io_uring_unregister_personality; io_uring_wait_cqe_timeout; io_uring_wait_cqes; __io_uring_get_cqe; __io_uring_sqring_wait; io_uring_mlock_size_params; io_uring_mlock_size; io_uring_register_buffers_tags; io_uring_register_buffers_update_tag; io_uring_register_files_tags; io_uring_register_files_update_tag; io_uring_register_iowq_aff; io_uring_unregister_iowq_aff; io_uring_register_iowq_max_workers; io_uring_submit_and_wait_timeout; io_uring_register_ring_fd; io_uring_unregister_ring_fd; io_uring_register_files_sparse; io_uring_register_buffers_sparse; io_uring_register_buf_ring; io_uring_unregister_buf_ring; io_uring_close_ring_fd; io_uring_setup_buf_ring; io_uring_free_buf_ring; io_uring_register_sync_cancel; io_uring_register_file_alloc_range; io_uring_enter; io_uring_enter2; io_uring_setup; io_uring_register; io_uring_get_events; io_uring_submit_and_get_events; io_uring_major_version; io_uring_minor_version; io_uring_check_version; io_uring_peek_cqe; io_uring_prep_timeout_update; io_uring_buf_ring_init; io_uring_prep_mkdirat; io_uring_prep_recv_multishot; io_uring_cq_advance; io_uring_prep_multishot_accept; io_uring_prep_fallocate; io_uring_prep_link_timeout; io_uring_prep_fsync; io_uring_prep_openat_direct; io_uring_prep_multishot_accept_direct; io_uring_opcode_supported; io_uring_prep_madvise; io_uring_prep_send_set_addr; io_uring_recvmsg_payload_length; io_uring_prep_readv2; io_uring_prep_msg_ring; io_uring_prep_rename; io_uring_prep_fadvise; io_uring_prep_send_zc; io_uring_buf_ring_advance; io_uring_cqe_get_data; io_uring_prep_symlinkat; io_uring_prep_writev; io_uring_cq_eventfd_toggle; io_uring_prep_provide_buffers; io_uring_cq_has_overflow; io_uring_prep_cancel_fd; io_uring_prep_socket; io_uring_prep_close_direct; io_uring_recvmsg_name; io_uring_prep_timeout_remove; io_uring_sqring_wait; io_uring_cq_eventfd_enabled; io_uring_prep_remove_buffers; io_uring_prep_tee; io_uring_prep_accept_direct; io_uring_prep_nop; io_uring_prep_getxattr; io_uring_prep_link; io_uring_prep_cancel; io_uring_prep_readv; io_uring_prep_connect; io_uring_cq_ready; io_uring_enable_rings; io_uring_prep_shutdown; io_uring_prep_openat; io_uring_sq_space_left; io_uring_recvmsg_payload; io_uring_prep_send; io_uring_buf_ring_add; io_uring_prep_send_zc_fixed; io_uring_prep_epoll_ctl; io_uring_recvmsg_cmsg_firsthdr; io_uring_prep_socket_direct; io_uring_buf_ring_cq_advance; __io_uring_buf_ring_cq_advance; io_uring_prep_mkdir; io_uring_wait_cqe_nr; io_uring_prep_unlink; io_uring_prep_writev2; io_uring_prep_openat2_direct; io_uring_sqe_set_flags; io_uring_sqe_set_data; io_uring_prep_accept; io_uring_prep_poll_update; io_uring_prep_splice; io_uring_prep_poll_multishot; io_uring_prep_symlink; io_uring_sqe_set_data64; io_uring_prep_cancel64; io_uring_prep_fsetxattr; io_uring_prep_recvmsg_multishot; io_uring_cqe_seen; io_uring_prep_sendmsg_zc; io_uring_prep_read; io_uring_prep_statx; io_uring_prep_sendmsg; io_uring_prep_unlinkat; io_uring_prep_setxattr; io_uring_cqe_get_data64; io_uring_prep_renameat; io_uring_prep_poll_remove; io_uring_prep_close; io_uring_sq_ready; io_uring_prep_files_update; io_uring_wait_cqe; io_uring_prep_fgetxattr; io_uring_prep_socket_direct_alloc; io_uring_prep_sync_file_range; io_uring_prep_read_fixed; io_uring_prep_openat2; io_uring_prep_recvmsg; io_uring_recvmsg_cmsg_nexthdr; io_uring_recvmsg_validate; io_uring_prep_rw; io_uring_prep_timeout; io_uring_prep_linkat; io_uring_prep_write_fixed; io_uring_prep_poll_add; io_uring_buf_ring_mask; io_uring_register_restrictions; io_uring_prep_write; io_uring_prep_recv; io_uring_prep_msg_ring_cqe_flags; io_uring_prep_msg_ring_fd; io_uring_prep_msg_ring_fd_alloc; io_uring_prep_sendto; io_uring_register_napi; io_uring_unregister_napi; local: *; }; LIBURING_2.5 { global: io_uring_queue_init_mem; io_uring_prep_cmd_sock; io_uring_prep_read_multishot; io_uring_prep_waitid; io_uring_prep_futex_wake; io_uring_prep_futex_wait; io_uring_prep_futex_waitv; } LIBURING_2.4; LIBURING_2.6 { global: io_uring_prep_fixed_fd_install; io_uring_buf_ring_available; io_uring_prep_ftruncate; } LIBURING_2.5; liburing-2.6/src/liburing.map000066400000000000000000000040261461424365000163040ustar00rootroot00000000000000LIBURING_2.0 { global: io_uring_get_probe; io_uring_get_probe_ring; io_uring_free_probe; io_uring_get_sqe; io_uring_peek_batch_cqe; io_uring_queue_exit; io_uring_queue_init; io_uring_queue_init_params; io_uring_queue_mmap; io_uring_register_buffers; io_uring_register_eventfd; io_uring_register_eventfd_async; io_uring_register_files; io_uring_register_files_update; io_uring_register_personality; io_uring_register_probe; io_uring_ring_dontfork; io_uring_submit; io_uring_submit_and_wait; io_uring_unregister_buffers; io_uring_unregister_eventfd; io_uring_unregister_files; io_uring_unregister_personality; io_uring_wait_cqe_timeout; io_uring_wait_cqes; __io_uring_get_cqe; __io_uring_sqring_wait; local: *; }; LIBURING_2.1 { global: io_uring_mlock_size_params; io_uring_mlock_size; io_uring_register_buffers_tags; io_uring_register_buffers_update_tag; io_uring_register_files_tags; io_uring_register_files_update_tag; io_uring_register_iowq_aff; io_uring_unregister_iowq_aff; io_uring_register_iowq_max_workers; } LIBURING_2.0; LIBURING_2.2 { global: io_uring_submit_and_wait_timeout; io_uring_register_ring_fd; io_uring_unregister_ring_fd; io_uring_register_files_sparse; io_uring_register_buffers_sparse; io_uring_register_buf_ring; io_uring_unregister_buf_ring; } LIBURING_2.1; LIBURING_2.3 { global: io_uring_register_sync_cancel; io_uring_register_file_alloc_range; io_uring_enter; io_uring_enter2; io_uring_setup; io_uring_register; io_uring_get_events; io_uring_submit_and_get_events; } LIBURING_2.2; LIBURING_2.4 { global: io_uring_major_version; io_uring_minor_version; io_uring_check_version; io_uring_close_ring_fd; io_uring_enable_rings; io_uring_register_restrictions; io_uring_setup_buf_ring; io_uring_free_buf_ring; } LIBURING_2.3; LIBURING_2.5 { global: io_uring_queue_init_mem; } LIBURING_2.4; LIBURING_2.6 { global: io_uring_buf_ring_head; io_uring_register_napi; io_uring_unregister_napi; } LIBURING_2.5; liburing-2.6/src/nolibc.c000066400000000000000000000017331461424365000154060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef CONFIG_NOLIBC #error "This file should only be compiled for no libc build" #endif #include "lib.h" #include "syscall.h" void *__uring_memset(void *s, int c, size_t n) { size_t i; unsigned char *p = s; for (i = 0; i < n; i++) { p[i] = (unsigned char) c; /* * An empty inline ASM to avoid auto-vectorization * because it's too bloated for liburing. */ __asm__ volatile (""); } return s; } struct uring_heap { size_t len; char user_p[] __attribute__((__aligned__)); }; void *__uring_malloc(size_t len) { struct uring_heap *heap; heap = __sys_mmap(NULL, sizeof(*heap) + len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (IS_ERR(heap)) return NULL; heap->len = sizeof(*heap) + len; return heap->user_p; } void __uring_free(void *p) { struct uring_heap *heap; if (uring_unlikely(!p)) return; heap = container_of(p, struct uring_heap, user_p); __sys_munmap(heap, heap->len); } liburing-2.6/src/queue.c000066400000000000000000000251521461424365000152650ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #define _POSIX_C_SOURCE 200112L #include "lib.h" #include "syscall.h" #include "liburing.h" #include "int_flags.h" #include "liburing/compat.h" #include "liburing/io_uring.h" /* * Returns true if we're not using SQ thread (thus nobody submits but us) * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly * awakened. For the latter case, we set the thread wakeup flag. * If no SQEs are ready for submission, returns false. */ static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned submit, unsigned *flags) { if (!submit) return false; if (!(ring->flags & IORING_SETUP_SQPOLL)) return true; /* * Ensure the kernel can see the store to the SQ tail before we read * the flags. */ io_uring_smp_mb(); if (uring_unlikely(IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP)) { *flags |= IORING_ENTER_SQ_WAKEUP; return true; } return false; } static inline bool cq_ring_needs_flush(struct io_uring *ring) { return IO_URING_READ_ONCE(*ring->sq.kflags) & (IORING_SQ_CQ_OVERFLOW | IORING_SQ_TASKRUN); } static inline bool cq_ring_needs_enter(struct io_uring *ring) { return (ring->flags & IORING_SETUP_IOPOLL) || cq_ring_needs_flush(ring); } struct get_data { unsigned submit; unsigned wait_nr; unsigned get_flags; int sz; int has_ts; void *arg; }; static int _io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, struct get_data *data) { struct io_uring_cqe *cqe = NULL; bool looped = false; int err = 0; do { bool need_enter = false; unsigned flags = 0; unsigned nr_available; int ret; ret = __io_uring_peek_cqe(ring, &cqe, &nr_available); if (ret) { if (!err) err = ret; break; } if (!cqe && !data->wait_nr && !data->submit) { /* * If we already looped once, we already entered * the kernel. Since there's nothing to submit or * wait for, don't keep retrying. */ if (looped || !cq_ring_needs_enter(ring)) { if (!err) err = -EAGAIN; break; } need_enter = true; } if (data->wait_nr > nr_available || need_enter) { flags = IORING_ENTER_GETEVENTS | data->get_flags; need_enter = true; } if (sq_ring_needs_enter(ring, data->submit, &flags)) need_enter = true; if (!need_enter) break; if (looped && data->has_ts) { struct io_uring_getevents_arg *arg = data->arg; if (!cqe && arg->ts && !err) err = -ETIME; break; } if (ring->int_flags & INT_FLAG_REG_RING) flags |= IORING_ENTER_REGISTERED_RING; ret = __sys_io_uring_enter2(ring->enter_ring_fd, data->submit, data->wait_nr, flags, data->arg, data->sz); if (ret < 0) { if (!err) err = ret; break; } data->submit -= ret; if (cqe) break; if (!looped) { looped = true; err = ret; } } while (1); *cqe_ptr = cqe; return err; } int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned submit, unsigned wait_nr, sigset_t *sigmask) { struct get_data data = { .submit = submit, .wait_nr = wait_nr, .get_flags = 0, .sz = _NSIG / 8, .arg = sigmask, }; return _io_uring_get_cqe(ring, cqe_ptr, &data); } int io_uring_get_events(struct io_uring *ring) { int flags = IORING_ENTER_GETEVENTS; if (ring->int_flags & INT_FLAG_REG_RING) flags |= IORING_ENTER_REGISTERED_RING; return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL); } /* * Fill in an array of IO completions up to count, if any are available. * Returns the amount of IO completions filled. */ unsigned io_uring_peek_batch_cqe(struct io_uring *ring, struct io_uring_cqe **cqes, unsigned count) { unsigned ready; bool overflow_checked = false; int shift = 0; if (ring->flags & IORING_SETUP_CQE32) shift = 1; again: ready = io_uring_cq_ready(ring); if (ready) { unsigned head = *ring->cq.khead; unsigned mask = ring->cq.ring_mask; unsigned last; int i = 0; count = count > ready ? ready : count; last = head + count; for (;head != last; head++, i++) cqes[i] = &ring->cq.cqes[(head & mask) << shift]; return count; } if (overflow_checked) return 0; if (cq_ring_needs_flush(ring)) { io_uring_get_events(ring); overflow_checked = true; goto again; } return 0; } /* * Sync internal state with kernel ring state on the SQ side. Returns the * number of pending items in the SQ ring, for the shared ring. */ static unsigned __io_uring_flush_sq(struct io_uring *ring) { struct io_uring_sq *sq = &ring->sq; unsigned tail = sq->sqe_tail; if (sq->sqe_head != tail) { sq->sqe_head = tail; /* * Ensure kernel sees the SQE updates before the tail update. */ if (!(ring->flags & IORING_SETUP_SQPOLL)) *sq->ktail = tail; else io_uring_smp_store_release(sq->ktail, tail); } /* * This load needs to be atomic, since sq->khead is written concurrently * by the kernel, but it doesn't need to be load_acquire, since the * kernel doesn't store to the submission queue; it advances khead just * to indicate that it's finished reading the submission queue entries * so they're available for us to write to. */ return tail - IO_URING_READ_ONCE(*sq->khead); } /* * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that * more efficiently than queueing an internal timeout command. */ static int io_uring_wait_cqes_new(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask) { struct io_uring_getevents_arg arg = { .sigmask = (unsigned long) sigmask, .sigmask_sz = _NSIG / 8, .ts = (unsigned long) ts }; struct get_data data = { .wait_nr = wait_nr, .get_flags = IORING_ENTER_EXT_ARG, .sz = sizeof(arg), .has_ts = ts != NULL, .arg = &arg }; return _io_uring_get_cqe(ring, cqe_ptr, &data); } /* * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note * that an sqe is used internally to handle the timeout. For kernel doesn't * support IORING_FEAT_EXT_ARG, applications using this function must never * set sqe->user_data to LIBURING_UDATA_TIMEOUT! * * For kernels without IORING_FEAT_EXT_ARG (5.10 and older), if 'ts' is * specified, the application need not call io_uring_submit() before * calling this function, as we will do that on its behalf. From this it also * follows that this function isn't safe to use for applications that split SQ * and CQ handling between two threads and expect that to work without * synchronization, as this function manipulates both the SQ and CQ side. * * For kernels with IORING_FEAT_EXT_ARG, no implicit submission is done and * hence this function is safe to use for applications that split SQ and CQ * handling between two threads. */ static int __io_uring_submit_timeout(struct io_uring *ring, unsigned wait_nr, struct __kernel_timespec *ts) { struct io_uring_sqe *sqe; int ret; /* * If the SQ ring is full, we may need to submit IO first */ sqe = io_uring_get_sqe(ring); if (!sqe) { ret = io_uring_submit(ring); if (ret < 0) return ret; sqe = io_uring_get_sqe(ring); if (!sqe) return -EAGAIN; } io_uring_prep_timeout(sqe, ts, wait_nr, 0); sqe->user_data = LIBURING_UDATA_TIMEOUT; return __io_uring_flush_sq(ring); } int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask) { int to_submit = 0; if (ts) { if (ring->features & IORING_FEAT_EXT_ARG) return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr, ts, sigmask); to_submit = __io_uring_submit_timeout(ring, wait_nr, ts); if (to_submit < 0) return to_submit; } return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask); } int io_uring_submit_and_wait_timeout(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, unsigned wait_nr, struct __kernel_timespec *ts, sigset_t *sigmask) { int to_submit; if (ts) { if (ring->features & IORING_FEAT_EXT_ARG) { struct io_uring_getevents_arg arg = { .sigmask = (unsigned long) sigmask, .sigmask_sz = _NSIG / 8, .ts = (unsigned long) ts }; struct get_data data = { .submit = __io_uring_flush_sq(ring), .wait_nr = wait_nr, .get_flags = IORING_ENTER_EXT_ARG, .sz = sizeof(arg), .has_ts = ts != NULL, .arg = &arg }; return _io_uring_get_cqe(ring, cqe_ptr, &data); } to_submit = __io_uring_submit_timeout(ring, wait_nr, ts); if (to_submit < 0) return to_submit; } else to_submit = __io_uring_flush_sq(ring); return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask); } /* * See io_uring_wait_cqes() - this function is the same, it just always uses * '1' as the wait_nr. */ int io_uring_wait_cqe_timeout(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, struct __kernel_timespec *ts) { return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL); } /* * Submit sqes acquired from io_uring_get_sqe() to the kernel. * * Returns number of sqes submitted */ static int __io_uring_submit(struct io_uring *ring, unsigned submitted, unsigned wait_nr, bool getevents) { bool cq_needs_enter = getevents || wait_nr || cq_ring_needs_enter(ring); unsigned flags; int ret; flags = 0; if (sq_ring_needs_enter(ring, submitted, &flags) || cq_needs_enter) { if (cq_needs_enter) flags |= IORING_ENTER_GETEVENTS; if (ring->int_flags & INT_FLAG_REG_RING) flags |= IORING_ENTER_REGISTERED_RING; ret = __sys_io_uring_enter(ring->enter_ring_fd, submitted, wait_nr, flags, NULL); } else ret = submitted; return ret; } static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) { return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr, false); } /* * Submit sqes acquired from io_uring_get_sqe() to the kernel. * * Returns number of sqes submitted */ int io_uring_submit(struct io_uring *ring) { return __io_uring_submit_and_wait(ring, 0); } /* * Like io_uring_submit(), but allows waiting for events as well. * * Returns number of sqes submitted */ int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) { return __io_uring_submit_and_wait(ring, wait_nr); } int io_uring_submit_and_get_events(struct io_uring *ring) { return __io_uring_submit(ring, __io_uring_flush_sq(ring), 0, true); } #ifdef LIBURING_INTERNAL struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) { return _io_uring_get_sqe(ring); } #endif int __io_uring_sqring_wait(struct io_uring *ring) { int flags = IORING_ENTER_SQ_WAIT; if (ring->int_flags & INT_FLAG_REG_RING) flags |= IORING_ENTER_REGISTERED_RING; return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL); } liburing-2.6/src/register.c000066400000000000000000000207721461424365000157700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #define _POSIX_C_SOURCE 200112L #include "lib.h" #include "syscall.h" #include "liburing.h" #include "int_flags.h" #include "liburing/compat.h" #include "liburing/io_uring.h" static inline int do_register(struct io_uring *ring, unsigned int opcode, const void *arg, unsigned int nr_args) { int fd; if (ring->int_flags & INT_FLAG_REG_REG_RING) { opcode |= IORING_REGISTER_USE_REGISTERED_RING; fd = ring->enter_ring_fd; } else { fd = ring->ring_fd; } return __sys_io_uring_register(fd, opcode, arg, nr_args); } int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off, const struct iovec *iovecs, const __u64 *tags, unsigned nr) { struct io_uring_rsrc_update2 up = { .offset = off, .data = (unsigned long)iovecs, .tags = (unsigned long)tags, .nr = nr, }; return do_register(ring, IORING_REGISTER_BUFFERS_UPDATE, &up, sizeof(up)); } int io_uring_register_buffers_tags(struct io_uring *ring, const struct iovec *iovecs, const __u64 *tags, unsigned nr) { struct io_uring_rsrc_register reg = { .nr = nr, .data = (unsigned long)iovecs, .tags = (unsigned long)tags, }; return do_register(ring, IORING_REGISTER_BUFFERS2, ®, sizeof(reg)); } int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr) { struct io_uring_rsrc_register reg = { .flags = IORING_RSRC_REGISTER_SPARSE, .nr = nr, }; return do_register(ring, IORING_REGISTER_BUFFERS2, ®, sizeof(reg)); } int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, unsigned nr_iovecs) { return do_register(ring, IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); } int io_uring_unregister_buffers(struct io_uring *ring) { return do_register(ring, IORING_UNREGISTER_BUFFERS, NULL, 0); } int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off, const int *files, const __u64 *tags, unsigned nr_files) { struct io_uring_rsrc_update2 up = { .offset = off, .data = (unsigned long)files, .tags = (unsigned long)tags, .nr = nr_files, }; return do_register(ring, IORING_REGISTER_FILES_UPDATE2, &up, sizeof(up)); } /* * Register an update for an existing file set. The updates will start at * 'off' in the original array, and 'nr_files' is the number of files we'll * update. * * Returns number of files updated on success, -ERROR on failure. */ int io_uring_register_files_update(struct io_uring *ring, unsigned off, const int *files, unsigned nr_files) { struct io_uring_files_update up = { .offset = off, .fds = (unsigned long) files, }; return do_register(ring, IORING_REGISTER_FILES_UPDATE, &up, nr_files); } static int increase_rlimit_nofile(unsigned nr) { int ret; struct rlimit rlim; ret = __sys_getrlimit(RLIMIT_NOFILE, &rlim); if (ret < 0) return ret; if (rlim.rlim_cur < nr) { rlim.rlim_cur += nr; __sys_setrlimit(RLIMIT_NOFILE, &rlim); } return 0; } int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr) { struct io_uring_rsrc_register reg = { .flags = IORING_RSRC_REGISTER_SPARSE, .nr = nr, }; int ret, did_increase = 0; do { ret = do_register(ring, IORING_REGISTER_FILES2, ®, sizeof(reg)); if (ret >= 0) break; if (ret == -EMFILE && !did_increase) { did_increase = 1; increase_rlimit_nofile(nr); continue; } break; } while (1); return ret; } int io_uring_register_files_tags(struct io_uring *ring, const int *files, const __u64 *tags, unsigned nr) { struct io_uring_rsrc_register reg = { .nr = nr, .data = (unsigned long)files, .tags = (unsigned long)tags, }; int ret, did_increase = 0; do { ret = do_register(ring, IORING_REGISTER_FILES2, ®, sizeof(reg)); if (ret >= 0) break; if (ret == -EMFILE && !did_increase) { did_increase = 1; increase_rlimit_nofile(nr); continue; } break; } while (1); return ret; } int io_uring_register_files(struct io_uring *ring, const int *files, unsigned nr_files) { int ret, did_increase = 0; do { ret = do_register(ring, IORING_REGISTER_FILES, files, nr_files); if (ret >= 0) break; if (ret == -EMFILE && !did_increase) { did_increase = 1; increase_rlimit_nofile(nr_files); continue; } break; } while (1); return ret; } int io_uring_unregister_files(struct io_uring *ring) { return do_register(ring, IORING_UNREGISTER_FILES, NULL, 0); } int io_uring_register_eventfd(struct io_uring *ring, int event_fd) { return do_register(ring, IORING_REGISTER_EVENTFD, &event_fd, 1); } int io_uring_unregister_eventfd(struct io_uring *ring) { return do_register(ring, IORING_UNREGISTER_EVENTFD, NULL, 0); } int io_uring_register_eventfd_async(struct io_uring *ring, int event_fd) { return do_register(ring, IORING_REGISTER_EVENTFD_ASYNC, &event_fd, 1); } int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p, unsigned int nr_ops) { return do_register(ring, IORING_REGISTER_PROBE, p, nr_ops); } int io_uring_register_personality(struct io_uring *ring) { return do_register(ring, IORING_REGISTER_PERSONALITY, NULL, 0); } int io_uring_unregister_personality(struct io_uring *ring, int id) { return do_register(ring, IORING_UNREGISTER_PERSONALITY, NULL, id); } int io_uring_register_restrictions(struct io_uring *ring, struct io_uring_restriction *res, unsigned int nr_res) { return do_register(ring, IORING_REGISTER_RESTRICTIONS, res, nr_res); } int io_uring_enable_rings(struct io_uring *ring) { return do_register(ring, IORING_REGISTER_ENABLE_RINGS, NULL, 0); } int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz, const cpu_set_t *mask) { if (cpusz >= (1U << 31)) return -EINVAL; return do_register(ring, IORING_REGISTER_IOWQ_AFF, mask, (int) cpusz); } int io_uring_unregister_iowq_aff(struct io_uring *ring) { return do_register(ring, IORING_UNREGISTER_IOWQ_AFF, NULL, 0); } int io_uring_register_iowq_max_workers(struct io_uring *ring, unsigned int *val) { return do_register(ring, IORING_REGISTER_IOWQ_MAX_WORKERS, val, 2); } int io_uring_register_ring_fd(struct io_uring *ring) { struct io_uring_rsrc_update up = { .data = ring->ring_fd, .offset = -1U, }; int ret; if (ring->int_flags & INT_FLAG_REG_RING) return -EEXIST; ret = do_register(ring, IORING_REGISTER_RING_FDS, &up, 1); if (ret == 1) { ring->enter_ring_fd = up.offset; ring->int_flags |= INT_FLAG_REG_RING; if (ring->features & IORING_FEAT_REG_REG_RING) { ring->int_flags |= INT_FLAG_REG_REG_RING; } } return ret; } int io_uring_unregister_ring_fd(struct io_uring *ring) { struct io_uring_rsrc_update up = { .offset = ring->enter_ring_fd, }; int ret; if (!(ring->int_flags & INT_FLAG_REG_RING)) return -EINVAL; ret = do_register(ring, IORING_UNREGISTER_RING_FDS, &up, 1); if (ret == 1) { ring->enter_ring_fd = ring->ring_fd; ring->int_flags &= ~(INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING); } return ret; } int io_uring_close_ring_fd(struct io_uring *ring) { if (!(ring->features & IORING_FEAT_REG_REG_RING)) return -EOPNOTSUPP; if (!(ring->int_flags & INT_FLAG_REG_RING)) return -EINVAL; if (ring->ring_fd == -1) return -EBADF; __sys_close(ring->ring_fd); ring->ring_fd = -1; return 1; } int io_uring_register_buf_ring(struct io_uring *ring, struct io_uring_buf_reg *reg, unsigned int __maybe_unused flags) { return do_register(ring, IORING_REGISTER_PBUF_RING, reg, 1); } int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid) { struct io_uring_buf_reg reg = { .bgid = bgid }; return do_register(ring, IORING_UNREGISTER_PBUF_RING, ®, 1); } int io_uring_buf_ring_head(struct io_uring *ring, int buf_group, uint16_t *head) { struct io_uring_buf_status buf_status = { .buf_group = buf_group, }; int ret; ret = do_register(ring, IORING_REGISTER_PBUF_STATUS, &buf_status, 1); if (ret) return ret; *head = buf_status.head; return 0; } int io_uring_register_sync_cancel(struct io_uring *ring, struct io_uring_sync_cancel_reg *reg) { return do_register(ring, IORING_REGISTER_SYNC_CANCEL, reg, 1); } int io_uring_register_file_alloc_range(struct io_uring *ring, unsigned off, unsigned len) { struct io_uring_file_index_range range = { .off = off, .len = len }; return do_register(ring, IORING_REGISTER_FILE_ALLOC_RANGE, &range, 0); } int io_uring_register_napi(struct io_uring *ring, struct io_uring_napi *napi) { return do_register(ring, IORING_REGISTER_NAPI, napi, 1); } int io_uring_unregister_napi(struct io_uring *ring, struct io_uring_napi *napi) { return do_register(ring, IORING_UNREGISTER_NAPI, napi, 1); } liburing-2.6/src/setup.c000066400000000000000000000426341461424365000153050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #define _DEFAULT_SOURCE #include "lib.h" #include "syscall.h" #include "liburing.h" #include "int_flags.h" #include "setup.h" #include "liburing/compat.h" #include "liburing/io_uring.h" #define KERN_MAX_ENTRIES 32768 #define KERN_MAX_CQ_ENTRIES (2 * KERN_MAX_ENTRIES) static inline int __fls(int x) { if (!x) return 0; return 8 * sizeof(x) - __builtin_clz(x); } static unsigned roundup_pow2(unsigned depth) { return 1U << __fls(depth - 1); } static int get_sq_cq_entries(unsigned entries, struct io_uring_params *p, unsigned *sq, unsigned *cq) { unsigned cq_entries; if (!entries) return -EINVAL; if (entries > KERN_MAX_ENTRIES) { if (!(p->flags & IORING_SETUP_CLAMP)) return -EINVAL; entries = KERN_MAX_ENTRIES; } entries = roundup_pow2(entries); if (p->flags & IORING_SETUP_CQSIZE) { if (!p->cq_entries) return -EINVAL; cq_entries = p->cq_entries; if (cq_entries > KERN_MAX_CQ_ENTRIES) { if (!(p->flags & IORING_SETUP_CLAMP)) return -EINVAL; cq_entries = KERN_MAX_CQ_ENTRIES; } cq_entries = roundup_pow2(cq_entries); if (cq_entries < entries) return -EINVAL; } else { cq_entries = 2 * entries; } *sq = entries; *cq = cq_entries; return 0; } static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq) { if (sq->ring_sz) __sys_munmap(sq->ring_ptr, sq->ring_sz); if (cq->ring_ptr && cq->ring_sz && cq->ring_ptr != sq->ring_ptr) __sys_munmap(cq->ring_ptr, cq->ring_sz); } static void io_uring_setup_ring_pointers(struct io_uring_params *p, struct io_uring_sq *sq, struct io_uring_cq *cq) { sq->khead = sq->ring_ptr + p->sq_off.head; sq->ktail = sq->ring_ptr + p->sq_off.tail; sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask; sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries; sq->kflags = sq->ring_ptr + p->sq_off.flags; sq->kdropped = sq->ring_ptr + p->sq_off.dropped; if (!(p->flags & IORING_SETUP_NO_SQARRAY)) sq->array = sq->ring_ptr + p->sq_off.array; cq->khead = cq->ring_ptr + p->cq_off.head; cq->ktail = cq->ring_ptr + p->cq_off.tail; cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask; cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries; cq->koverflow = cq->ring_ptr + p->cq_off.overflow; cq->cqes = cq->ring_ptr + p->cq_off.cqes; if (p->cq_off.flags) cq->kflags = cq->ring_ptr + p->cq_off.flags; sq->ring_mask = *sq->kring_mask; sq->ring_entries = *sq->kring_entries; cq->ring_mask = *cq->kring_mask; cq->ring_entries = *cq->kring_entries; } static int io_uring_mmap(int fd, struct io_uring_params *p, struct io_uring_sq *sq, struct io_uring_cq *cq) { size_t size; int ret; size = sizeof(struct io_uring_cqe); if (p->flags & IORING_SETUP_CQE32) size += sizeof(struct io_uring_cqe); sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); cq->ring_sz = p->cq_off.cqes + p->cq_entries * size; if (p->features & IORING_FEAT_SINGLE_MMAP) { if (cq->ring_sz > sq->ring_sz) sq->ring_sz = cq->ring_sz; cq->ring_sz = sq->ring_sz; } sq->ring_ptr = __sys_mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); if (IS_ERR(sq->ring_ptr)) return PTR_ERR(sq->ring_ptr); if (p->features & IORING_FEAT_SINGLE_MMAP) { cq->ring_ptr = sq->ring_ptr; } else { cq->ring_ptr = __sys_mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); if (IS_ERR(cq->ring_ptr)) { ret = PTR_ERR(cq->ring_ptr); cq->ring_ptr = NULL; goto err; } } size = sizeof(struct io_uring_sqe); if (p->flags & IORING_SETUP_SQE128) size += 64; sq->sqes = __sys_mmap(0, size * p->sq_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); if (IS_ERR(sq->sqes)) { ret = PTR_ERR(sq->sqes); err: io_uring_unmap_rings(sq, cq); return ret; } io_uring_setup_ring_pointers(p, sq, cq); return 0; } /* * For users that want to specify sq_thread_cpu or sq_thread_idle, this * interface is a convenient helper for mmap()ing the rings. * Returns -errno on error, or zero on success. On success, 'ring' * contains the necessary information to read/write to the rings. */ __cold int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring) { memset(ring, 0, sizeof(*ring)); return io_uring_mmap(fd, p, &ring->sq, &ring->cq); } /* * Ensure that the mmap'ed rings aren't available to a child after a fork(2). * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges. */ __cold int io_uring_ring_dontfork(struct io_uring *ring) { size_t len; int ret; if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr) return -EINVAL; len = sizeof(struct io_uring_sqe); if (ring->flags & IORING_SETUP_SQE128) len += 64; len *= ring->sq.ring_entries; ret = __sys_madvise(ring->sq.sqes, len, MADV_DONTFORK); if (ret < 0) return ret; len = ring->sq.ring_sz; ret = __sys_madvise(ring->sq.ring_ptr, len, MADV_DONTFORK); if (ret < 0) return ret; if (ring->cq.ring_ptr != ring->sq.ring_ptr) { len = ring->cq.ring_sz; ret = __sys_madvise(ring->cq.ring_ptr, len, MADV_DONTFORK); if (ret < 0) return ret; } return 0; } /* FIXME */ static size_t huge_page_size = 2 * 1024 * 1024; /* * Returns negative for error, or number of bytes used in the buffer on success */ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p, struct io_uring_sq *sq, struct io_uring_cq *cq, void *buf, size_t buf_size) { unsigned long page_size = get_page_size(); unsigned sq_entries, cq_entries; size_t ring_mem, sqes_mem; unsigned long mem_used = 0; void *ptr; int ret; ret = get_sq_cq_entries(entries, p, &sq_entries, &cq_entries); if (ret) return ret; sqes_mem = sq_entries * sizeof(struct io_uring_sqe); sqes_mem = (sqes_mem + page_size - 1) & ~(page_size - 1); ring_mem = cq_entries * sizeof(struct io_uring_cqe); if (p->flags & IORING_SETUP_CQE32) ring_mem *= 2; if (!(p->flags & IORING_SETUP_NO_SQARRAY)) ring_mem += sq_entries * sizeof(unsigned); mem_used = sqes_mem + ring_mem; mem_used = (mem_used + page_size - 1) & ~(page_size - 1); /* * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB * huge page by itself, so the SQ entries won't fit in the same huge * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES, * but check that too to future-proof (e.g. against different huge page * sizes). Bail out early so we don't overrun. */ if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size)) return -ENOMEM; if (buf) { if (mem_used > buf_size) return -ENOMEM; ptr = buf; } else { int map_hugetlb = 0; if (sqes_mem <= page_size) buf_size = page_size; else { buf_size = huge_page_size; map_hugetlb = MAP_HUGETLB; } ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS|map_hugetlb, -1, 0); if (IS_ERR(ptr)) return PTR_ERR(ptr); } sq->sqes = ptr; if (mem_used <= buf_size) { sq->ring_ptr = (void *) sq->sqes + sqes_mem; /* clear ring sizes, we have just one mmap() to undo */ cq->ring_sz = 0; sq->ring_sz = 0; } else { int map_hugetlb = 0; if (ring_mem <= page_size) buf_size = page_size; else { buf_size = huge_page_size; map_hugetlb = MAP_HUGETLB; } ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS|map_hugetlb, -1, 0); if (IS_ERR(ptr)) { __sys_munmap(sq->sqes, 1); return PTR_ERR(ptr); } sq->ring_ptr = ptr; sq->ring_sz = buf_size; cq->ring_sz = 0; } cq->ring_ptr = (void *) sq->ring_ptr; p->sq_off.user_addr = (unsigned long) sq->sqes; p->cq_off.user_addr = (unsigned long) sq->ring_ptr; return (int) mem_used; } int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size) { int fd, ret = 0; unsigned *sq_array; unsigned sq_entries, index; memset(ring, 0, sizeof(*ring)); /* * The kernel does this check already, but checking it here allows us * to avoid handling it below. */ if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY && !(p->flags & IORING_SETUP_NO_MMAP)) return -EINVAL; if (p->flags & IORING_SETUP_NO_MMAP) { ret = io_uring_alloc_huge(entries, p, &ring->sq, &ring->cq, buf, buf_size); if (ret < 0) return ret; if (buf) ring->int_flags |= INT_FLAG_APP_MEM; } fd = __sys_io_uring_setup(entries, p); if (fd < 0) { if ((p->flags & IORING_SETUP_NO_MMAP) && !(ring->int_flags & INT_FLAG_APP_MEM)) { __sys_munmap(ring->sq.sqes, 1); io_uring_unmap_rings(&ring->sq, &ring->cq); } return fd; } if (!(p->flags & IORING_SETUP_NO_MMAP)) { ret = io_uring_queue_mmap(fd, p, ring); if (ret) { __sys_close(fd); return ret; } } else { io_uring_setup_ring_pointers(p, &ring->sq, &ring->cq); } /* * Directly map SQ slots to SQEs */ sq_entries = ring->sq.ring_entries; if (!(p->flags & IORING_SETUP_NO_SQARRAY)) { sq_array = ring->sq.array; for (index = 0; index < sq_entries; index++) sq_array[index] = index; } ring->features = p->features; ring->flags = p->flags; ring->enter_ring_fd = fd; if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY) { ring->ring_fd = -1; ring->int_flags |= INT_FLAG_REG_RING | INT_FLAG_REG_REG_RING; } else { ring->ring_fd = fd; } return ret; } static int io_uring_queue_init_try_nosqarr(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size) { unsigned flags = p->flags; int ret; p->flags |= IORING_SETUP_NO_SQARRAY; ret = __io_uring_queue_init_params(entries, ring, p, buf, buf_size); /* don't fallback if explicitly asked for NOSQARRAY */ if (ret != -EINVAL || (flags & IORING_SETUP_NO_SQARRAY)) return ret; p->flags = flags; return __io_uring_queue_init_params(entries, ring, p, buf, buf_size); } /* * Like io_uring_queue_init_params(), except it allows the application to pass * in a pre-allocated memory range that is used for the shared data between * the kernel and the application. This includes the sqes array, and the two * rings. The memory must be contiguous, the use case here is that the app * allocates a huge page and passes it in. * * Returns the number of bytes used in the buffer, the app can then reuse * the buffer with the returned offset to put more rings in the same huge * page. Returns -ENOMEM if there's not enough room left in the buffer to * host the ring. */ int io_uring_queue_init_mem(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size) { /* should already be set... */ p->flags |= IORING_SETUP_NO_MMAP; return io_uring_queue_init_try_nosqarr(entries, ring, p, buf, buf_size); } int io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p) { int ret; ret = io_uring_queue_init_try_nosqarr(entries, ring, p, NULL, 0); return ret >= 0 ? 0 : ret; } /* * Returns -errno on error, or zero on success. On success, 'ring' * contains the necessary information to read/write to the rings. */ __cold int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) { struct io_uring_params p; memset(&p, 0, sizeof(p)); p.flags = flags; return io_uring_queue_init_params(entries, ring, &p); } __cold void io_uring_queue_exit(struct io_uring *ring) { struct io_uring_sq *sq = &ring->sq; struct io_uring_cq *cq = &ring->cq; size_t sqe_size; if (!sq->ring_sz) { sqe_size = sizeof(struct io_uring_sqe); if (ring->flags & IORING_SETUP_SQE128) sqe_size += 64; __sys_munmap(sq->sqes, sqe_size * sq->ring_entries); io_uring_unmap_rings(sq, cq); } else { if (!(ring->int_flags & INT_FLAG_APP_MEM)) { __sys_munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); io_uring_unmap_rings(sq, cq); } } /* * Not strictly required, but frees up the slot we used now rather * than at process exit time. */ if (ring->int_flags & INT_FLAG_REG_RING) io_uring_unregister_ring_fd(ring); if (ring->ring_fd != -1) __sys_close(ring->ring_fd); } __cold struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring) { struct io_uring_probe *probe; size_t len; int r; len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op); probe = malloc(len); if (!probe) return NULL; memset(probe, 0, len); r = io_uring_register_probe(ring, probe, 256); if (r >= 0) return probe; free(probe); return NULL; } __cold struct io_uring_probe *io_uring_get_probe(void) { struct io_uring ring; struct io_uring_probe *probe; int r; r = io_uring_queue_init(2, &ring, 0); if (r < 0) return NULL; probe = io_uring_get_probe_ring(&ring); io_uring_queue_exit(&ring); return probe; } __cold void io_uring_free_probe(struct io_uring_probe *probe) { free(probe); } static size_t npages(size_t size, long page_size) { size--; size /= page_size; return __fls((int) size); } #define KRING_SIZE 320 static size_t rings_size(struct io_uring_params *p, unsigned entries, unsigned cq_entries, long page_size) { size_t pages, sq_size, cq_size; cq_size = sizeof(struct io_uring_cqe); if (p->flags & IORING_SETUP_CQE32) cq_size += sizeof(struct io_uring_cqe); cq_size *= cq_entries; cq_size += KRING_SIZE; cq_size = (cq_size + 63) & ~63UL; pages = (size_t) 1 << npages(cq_size, page_size); sq_size = sizeof(struct io_uring_sqe); if (p->flags & IORING_SETUP_SQE128) sq_size += 64; sq_size *= entries; pages += (size_t) 1 << npages(sq_size, page_size); return pages * page_size; } /* * Return the required ulimit -l memlock memory required for a given ring * setup, in bytes. May return -errno on error. On newer (5.12+) kernels, * io_uring no longer requires any memlock memory, and hence this function * will return 0 for that case. On older (5.11 and prior) kernels, this will * return the required memory so that the caller can ensure that enough space * is available before setting up a ring with the specified parameters. */ __cold ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p) { struct io_uring_params lp; struct io_uring ring; unsigned cq_entries, sq; long page_size; ssize_t ret; int cret; memset(&lp, 0, sizeof(lp)); /* * We only really use this inited ring to see if the kernel is newer * or not. Newer kernels don't require memlocked memory. If we fail, * it's most likely because it's an older kernel and we have no * available memlock space. Just continue on, lp.features will still * be zeroed at this point and we'll do the right thing. */ ret = io_uring_queue_init_params(entries, &ring, &lp); if (!ret) io_uring_queue_exit(&ring); /* * Native workers imply using cgroup memory accounting, and hence no * memlock memory is needed for the ring allocations. */ if (lp.features & IORING_FEAT_NATIVE_WORKERS) return 0; if (!entries) return -EINVAL; if (entries > KERN_MAX_ENTRIES) { if (!(p->flags & IORING_SETUP_CLAMP)) return -EINVAL; entries = KERN_MAX_ENTRIES; } cret = get_sq_cq_entries(entries, p, &sq, &cq_entries); if (cret) return cret; page_size = get_page_size(); return rings_size(p, sq, cq_entries, page_size); } /* * Return required ulimit -l memory space for a given ring setup. See * @io_uring_mlock_size_params(). */ __cold ssize_t io_uring_mlock_size(unsigned entries, unsigned flags) { struct io_uring_params p; memset(&p, 0, sizeof(p)); p.flags = flags; return io_uring_mlock_size_params(entries, &p); } #if defined(__hppa__) static struct io_uring_buf_ring *br_setup(struct io_uring *ring, unsigned int nentries, int bgid, unsigned int flags, int *ret) { struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; size_t ring_size; off_t off; int lret; memset(®, 0, sizeof(reg)); reg.ring_entries = nentries; reg.bgid = bgid; reg.flags = IOU_PBUF_RING_MMAP; *ret = 0; lret = io_uring_register_buf_ring(ring, ®, flags); if (lret) { *ret = lret; return NULL; } off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; ring_size = nentries * sizeof(struct io_uring_buf); br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring->ring_fd, off); if (IS_ERR(br)) { *ret = PTR_ERR(br); return NULL; } return br; } #else static struct io_uring_buf_ring *br_setup(struct io_uring *ring, unsigned int nentries, int bgid, unsigned int flags, int *ret) { struct io_uring_buf_ring *br; struct io_uring_buf_reg reg; size_t ring_size; int lret; memset(®, 0, sizeof(reg)); ring_size = nentries * sizeof(struct io_uring_buf); br = __sys_mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (IS_ERR(br)) { *ret = PTR_ERR(br); return NULL; } reg.ring_addr = (unsigned long) (uintptr_t) br; reg.ring_entries = nentries; reg.bgid = bgid; *ret = 0; lret = io_uring_register_buf_ring(ring, ®, flags); if (lret) { __sys_munmap(br, ring_size); *ret = lret; br = NULL; } return br; } #endif struct io_uring_buf_ring *io_uring_setup_buf_ring(struct io_uring *ring, unsigned int nentries, int bgid, unsigned int flags, int *ret) { struct io_uring_buf_ring *br; br = br_setup(ring, nentries, bgid, flags, ret); if (br) io_uring_buf_ring_init(br); return br; } int io_uring_free_buf_ring(struct io_uring *ring, struct io_uring_buf_ring *br, unsigned int nentries, int bgid) { int ret; ret = io_uring_unregister_buf_ring(ring, bgid); if (ret) return ret; __sys_munmap(br, nentries * sizeof(struct io_uring_buf)); return 0; } liburing-2.6/src/setup.h000066400000000000000000000003521461424365000153010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_SETUP_H #define LIBURING_SETUP_H int __io_uring_queue_init_params(unsigned entries, struct io_uring *ring, struct io_uring_params *p, void *buf, size_t buf_size); #endif liburing-2.6/src/syscall.c000066400000000000000000000014441461424365000156110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include "syscall.h" #include int io_uring_enter(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig) { return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig); } int io_uring_enter2(unsigned int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, size_t sz) { return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig, sz); } int io_uring_setup(unsigned int entries, struct io_uring_params *p) { return __sys_io_uring_setup(entries, p); } int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg, unsigned int nr_args) { return __sys_io_uring_register(fd, opcode, arg, nr_args); } liburing-2.6/src/syscall.h000066400000000000000000000021561461424365000156170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #ifndef LIBURING_SYSCALL_H #define LIBURING_SYSCALL_H #include #include #include #include #include #include #include #include #include /* * Don't put this below the #include "arch/$arch/syscall.h", that * file may need it. */ struct io_uring_params; static inline void *ERR_PTR(intptr_t n) { return (void *) n; } static inline int PTR_ERR(const void *ptr) { return (int) (intptr_t) ptr; } static inline bool IS_ERR(const void *ptr) { return uring_unlikely((uintptr_t) ptr >= (uintptr_t) -4095UL); } #if defined(__x86_64__) || defined(__i386__) #include "arch/x86/syscall.h" #elif defined(__aarch64__) #include "arch/aarch64/syscall.h" #elif defined(__riscv) && __riscv_xlen == 64 #include "arch/riscv64/syscall.h" #else /* * We don't have native syscall wrappers * for this arch. Must use libc! */ #ifdef CONFIG_NOLIBC #error "This arch doesn't support building liburing without libc" #endif /* libc syscall wrappers. */ #include "arch/generic/syscall.h" #endif #endif liburing-2.6/src/version.c000066400000000000000000000006361461424365000156260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include "liburing.h" #include "liburing/io_uring_version.h" int io_uring_major_version(void) { return IO_URING_VERSION_MAJOR; } int io_uring_minor_version(void) { return IO_URING_VERSION_MINOR; } bool io_uring_check_version(int major, int minor) { return major > io_uring_major_version() || (major == io_uring_major_version() && minor > io_uring_minor_version()); } liburing-2.6/test/000077500000000000000000000000001461424365000141605ustar00rootroot00000000000000liburing-2.6/test/232c93d07b74.c000066400000000000000000000137231461424365000160070ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test case for socket read/write through IORING_OP_READV and * IORING_OP_WRITEV, using both TCP and sockets and blocking and * non-blocking IO. * * Heavily based on a test case from Hrvoje Zeba */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define RECV_BUFF_SIZE 2 #define SEND_BUFF_SIZE 3 struct params { int tcp; int non_blocking; __be16 bind_port; }; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static int rcv_ready = 0; static void set_rcv_ready(void) { pthread_mutex_lock(&mutex); rcv_ready = 1; pthread_cond_signal(&cond); pthread_mutex_unlock(&mutex); } static void wait_for_rcv_ready(void) { pthread_mutex_lock(&mutex); while (!rcv_ready) pthread_cond_wait(&cond, &mutex); pthread_mutex_unlock(&mutex); } static void *rcv(void *arg) { struct params *p = arg; int s0; int res; if (p->tcp) { int ret, val = 1; s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); res = setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(res != -1); res = setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(res != -1); struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(s0, &addr); assert(!ret); p->bind_port = addr.sin_port; } else { s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); assert(s0 != -1); struct sockaddr_un addr; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; memcpy(addr.sun_path, "\0sock", 6); res = bind(s0, (struct sockaddr *) &addr, sizeof(addr)); assert(res != -1); } res = listen(s0, 128); assert(res != -1); set_rcv_ready(); int s1 = accept(s0, NULL, NULL); assert(s1 != -1); if (p->non_blocking) { int flags = fcntl(s1, F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; res = fcntl(s1, F_SETFL, flags); assert(res != -1); } struct io_uring m_io_uring; void *ret = NULL; res = io_uring_queue_init(32, &m_io_uring, 0); assert(res >= 0); int bytes_read = 0; int expected_byte = 0; int done = 0; while (!done && bytes_read != 33) { char buff[RECV_BUFF_SIZE]; struct iovec iov; iov.iov_base = buff; iov.iov_len = sizeof(buff); struct io_uring_sqe *sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_readv(sqe, s1, &iov, 1, 0); res = io_uring_submit(&m_io_uring); assert(res != -1); struct io_uring_cqe *cqe; unsigned head; unsigned count = 0; while (!done && count != 1) { io_uring_for_each_cqe(&m_io_uring, head, cqe) { if (cqe->res < 0) assert(cqe->res == -EAGAIN); else { int i; for (i = 0; i < cqe->res; i++) { if (buff[i] != expected_byte) { fprintf(stderr, "Received %d, wanted %d\n", buff[i], expected_byte); ret++; done = 1; } expected_byte++; } bytes_read += cqe->res; } count++; } assert(count <= 1); io_uring_cq_advance(&m_io_uring, count); } } shutdown(s1, SHUT_RDWR); close(s1); close(s0); io_uring_queue_exit(&m_io_uring); return ret; } static void *snd(void *arg) { struct params *p = arg; int s0; int ret; wait_for_rcv_ready(); if (p->tcp) { int val = 1; s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); ret = setsockopt(s0, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_port = p->bind_port; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = connect(s0, (struct sockaddr*) &addr, sizeof(addr)); assert(ret != -1); } else { s0 = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); assert(s0 != -1); struct sockaddr_un addr; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; memcpy(addr.sun_path, "\0sock", 6); ret = connect(s0, (struct sockaddr*) &addr, sizeof(addr)); assert(ret != -1); } if (p->non_blocking) { int flags = fcntl(s0, F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(s0, F_SETFL, flags); assert(ret != -1); } struct io_uring m_io_uring; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); int bytes_written = 0; int done = 0; while (!done && bytes_written != 33) { char buff[SEND_BUFF_SIZE]; int i; for (i = 0; i < SEND_BUFF_SIZE; i++) buff[i] = i + bytes_written; struct iovec iov; iov.iov_base = buff; iov.iov_len = sizeof(buff); struct io_uring_sqe *sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_writev(sqe, s0, &iov, 1, 0); ret = io_uring_submit(&m_io_uring); assert(ret != -1); struct io_uring_cqe *cqe; unsigned head; unsigned count = 0; while (!done && count != 1) { io_uring_for_each_cqe(&m_io_uring, head, cqe) { if (cqe->res < 0) { if (cqe->res == -EPIPE) { done = 1; break; } assert(cqe->res == -EAGAIN); } else { bytes_written += cqe->res; } count++; } assert(count <= 1); io_uring_cq_advance(&m_io_uring, count); } usleep(100000); } shutdown(s0, SHUT_RDWR); close(s0); io_uring_queue_exit(&m_io_uring); return NULL; } int main(int argc, char *argv[]) { struct params p; pthread_t t1, t2; void *res1, *res2; int i, exit_val = T_EXIT_PASS; if (argc > 1) return T_EXIT_SKIP; for (i = 0; i < 4; i++) { p.tcp = i & 1; p.non_blocking = (i & 2) >> 1; rcv_ready = 0; pthread_create(&t1, NULL, rcv, &p); pthread_create(&t2, NULL, snd, &p); pthread_join(t1, &res1); pthread_join(t2, &res2); if (res1 || res2) { fprintf(stderr, "Failed tcp=%d, non_blocking=%d\n", p.tcp, p.non_blocking); exit_val = T_EXIT_FAIL; } } return exit_val; } liburing-2.6/test/35fa71a030ca.c000066400000000000000000000161571461424365000161340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" #if !defined(SYS_futex) && defined(SYS_futex_time64) # define SYS_futex SYS_futex_time64 #endif static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static void thread_start(void* (*fn)(void*), void* arg) { pthread_t th; pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, 128 << 10); int i; for (i = 0; i < 100; i++) { if (pthread_create(&th, &attr, fn, arg) == 0) { pthread_attr_destroy(&attr); return; } if (errno == EAGAIN) { usleep(50); continue; } break; } exit(1); } typedef struct { int state; } event_t; static void event_init(event_t* ev) { ev->state = 0; } static void event_reset(event_t* ev) { ev->state = 0; } static void event_set(event_t* ev) { if (ev->state) exit(1); __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG); } static void event_wait(event_t* ev) { while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); } static int event_isset(event_t* ev) { return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); } static int event_timedwait(event_t* ev, uint64_t timeout) { uint64_t start = current_time_ms(); uint64_t now = start; for (;;) { uint64_t remain = timeout - (now - start); struct timespec ts; ts.tv_sec = remain / 1000; ts.tv_nsec = (remain % 1000) * 1000 * 1000; syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED)) return 1; now = current_time_ms(); if (now - start > timeout) return 0; } } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); int i; for (i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } #define SYZ_HAVE_SETUP_TEST 1 static void setup_test(void) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); } struct thread_t { int created, call; event_t ready, done; }; static struct thread_t threads[16]; static void execute_call(int call); static int running; static void* thr(void* arg) { struct thread_t* th = (struct thread_t*)arg; for (;;) { event_wait(&th->ready); event_reset(&th->ready); execute_call(th->call); __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); event_set(&th->done); } return 0; } static void execute_one(void) { int i, call, thread; for (call = 0; call < 3; call++) { for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) { struct thread_t* th = &threads[thread]; if (!th->created) { th->created = 1; event_init(&th->ready); event_init(&th->done); event_set(&th->done); thread_start(thr, th); } if (!event_isset(&th->done)) continue; event_reset(&th->done); th->call = call; __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); event_set(&th->ready); event_timedwait(&th->done, 45); break; } } for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++) sleep_ms(1); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { for (;;) { int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { setup_test(); execute_one(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; sleep_ms(1); if (current_time_ms() - start < 5 * 1000) continue; kill_and_wait(pid, &status); break; } } } static uint64_t r[1] = {0xffffffffffffffff}; void execute_call(int call) { long res; switch (call) { case 0: *(uint32_t*)0x20000040 = 0; *(uint32_t*)0x20000044 = 0; *(uint32_t*)0x20000048 = 0; *(uint32_t*)0x2000004c = 0; *(uint32_t*)0x20000050 = 0; *(uint32_t*)0x20000054 = 0; *(uint32_t*)0x20000058 = 0; *(uint32_t*)0x2000005c = 0; *(uint32_t*)0x20000060 = 0; *(uint32_t*)0x20000064 = 0; *(uint32_t*)0x20000068 = 0; *(uint32_t*)0x2000006c = 0; *(uint32_t*)0x20000070 = 0; *(uint32_t*)0x20000074 = 0; *(uint32_t*)0x20000078 = 0; *(uint32_t*)0x2000007c = 0; *(uint32_t*)0x20000080 = 0; *(uint32_t*)0x20000084 = 0; *(uint64_t*)0x20000088 = 0; *(uint32_t*)0x20000090 = 0; *(uint32_t*)0x20000094 = 0; *(uint32_t*)0x20000098 = 0; *(uint32_t*)0x2000009c = 0; *(uint32_t*)0x200000a0 = 0; *(uint32_t*)0x200000a4 = 0; *(uint32_t*)0x200000a8 = 0; *(uint32_t*)0x200000ac = 0; *(uint64_t*)0x200000b0 = 0; res = __sys_io_uring_setup(0x64, (struct io_uring_params *) 0x20000040UL); if (res != -1) r[0] = res; break; case 1: __sys_io_uring_register((long)r[0], 0, 0, 0); break; case 2: __sys_io_uring_register((long)r[0], 0, 0, 0); break; } } static void sig_int(int sig) { exit(0); } int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; signal(SIGINT, sig_int); mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0); signal(SIGALRM, sig_int); alarm(5); loop(); return T_EXIT_PASS; } liburing-2.6/test/500f9fbadef8.c000066400000000000000000000033301461424365000163060ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Single depth submit+wait poll hang test * */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define BLOCKS 4096 int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov; char buf[32]; off_t offset; unsigned blocks; int ret, fd; if (argc > 1) return T_EXIT_SKIP; t_posix_memalign(&iov.iov_base, 4096, 4096); iov.iov_len = 4096; ret = io_uring_queue_init(1, &ring, IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } sprintf(buf, "./XXXXXX"); fd = mkostemp(buf, O_WRONLY | O_DIRECT | O_CREAT); if (fd < 0) { if (errno == EINVAL) return T_EXIT_SKIP; perror("mkostemp"); return T_EXIT_FAIL; } offset = 0; blocks = BLOCKS; do { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_writev(sqe, fd, &iov, 1, offset); ret = io_uring_submit_and_wait(&ring, 1); if (ret < 0) { fprintf(stderr, "submit_and_wait: %d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion: %d\n", ret); goto err; } if (cqe->res != 4096) { if (cqe->res == -EOPNOTSUPP) goto skipped; goto err; } io_uring_cqe_seen(&ring, cqe); offset += 4096; } while (--blocks); close(fd); unlink(buf); return T_EXIT_PASS; err: close(fd); unlink(buf); return T_EXIT_FAIL; skipped: fprintf(stderr, "Polling not supported in current dir, test skipped\n"); close(fd); unlink(buf); return T_EXIT_SKIP; } liburing-2.6/test/7ad0e4b2f83c.c000066400000000000000000000036431461424365000162260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include "liburing.h" #include "helpers.h" static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } int main(int argc, char *argv[]) { struct __kernel_timespec ts1, ts2; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; unsigned long msec; struct timeval tv; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "io_uring_queue_init=%d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "io_uring_submit1=%d\n", ret); return T_EXIT_FAIL; } ts1.tv_sec = 5, ts1.tv_nsec = 0; ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts1); if (ret) { fprintf(stderr, "io_uring_wait_cqe_timeout=%d\n", ret); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); gettimeofday(&tv, NULL); ts2.tv_sec = 1; ts2.tv_nsec = 0; sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts2, 0, 0); sqe->user_data = 89; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "io_uring_submit2=%d\n", ret); return T_EXIT_FAIL; } io_uring_wait_cqe(&ring, &cqe); io_uring_cqe_seen(&ring, cqe); msec = mtime_since_now(&tv); if (msec >= 900 && msec <= 1100) { io_uring_queue_exit(&ring); return T_EXIT_PASS; } fprintf(stderr, "%s: Timeout seems wonky (got %lu)\n", __FUNCTION__, msec); io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/8a9973408177.c000066400000000000000000000032531461424365000156660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int register_file(struct io_uring *ring) { char buf[32]; int ret, fd; sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } ret = io_uring_register_files(ring, &fd, 1); if (ret) { fprintf(stderr, "file register %d\n", ret); return 1; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "file register %d\n", ret); return 1; } unlink(buf); close(fd); return 0; } static int test_single_fsync(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buf[32]; int fd, ret; sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_fsync(sqe, fd, 0); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); unlink(buf); return 0; err: unlink(buf); return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } ret = register_file(&ring); if (ret) return ret; ret = test_single_fsync(&ring); if (ret) { printf("test_single_fsync failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/917257daa0fe.c000066400000000000000000000026261461424365000161510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0); *(uint32_t*)0x20000000 = 0; *(uint32_t*)0x20000004 = 0; *(uint32_t*)0x20000008 = 6; *(uint32_t*)0x2000000c = 0; *(uint32_t*)0x20000010 = 0x3af; *(uint32_t*)0x20000014 = 0; *(uint32_t*)0x20000018 = 0; *(uint32_t*)0x2000001c = 0; *(uint32_t*)0x20000020 = 0; *(uint32_t*)0x20000024 = 0; *(uint32_t*)0x20000028 = 0; *(uint32_t*)0x2000002c = 0; *(uint32_t*)0x20000030 = 0; *(uint32_t*)0x20000034 = 0; *(uint32_t*)0x20000038 = 0; *(uint32_t*)0x2000003c = 0; *(uint32_t*)0x20000040 = 0; *(uint32_t*)0x20000044 = 0; *(uint64_t*)0x20000048 = 0; *(uint32_t*)0x20000050 = 0; *(uint32_t*)0x20000054 = 0; *(uint32_t*)0x20000058 = 0; *(uint32_t*)0x2000005c = 0; *(uint32_t*)0x20000060 = 0; *(uint32_t*)0x20000064 = 0; *(uint32_t*)0x20000068 = 0; *(uint32_t*)0x2000006c = 0; *(uint64_t*)0x20000070 = 0; __sys_io_uring_setup(0x7a6, (struct io_uring_params *) 0x20000000UL); return T_EXIT_PASS; } liburing-2.6/test/Makefile000066400000000000000000000127501461424365000156250ustar00rootroot00000000000000prefix ?= /usr datadir ?= $(prefix)/share INSTALL=install ifneq ($(MAKECMDGOALS),clean) include ../config-host.mak endif CPPFLAGS ?= override CPPFLAGS += \ -D_GNU_SOURCE \ -D__SANE_USERSPACE_TYPES__ \ -I../src/include/ \ -include ../config-host.h \ -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 CFLAGS ?= -g -O3 -Wall -Wextra XCFLAGS = -Wno-unused-parameter -Wno-sign-compare ifdef CONFIG_HAVE_STRINGOP_OVERFLOW XCFLAGS += -Wstringop-overflow=0 endif ifdef CONFIG_HAVE_ARRAY_BOUNDS XCFLAGS += -Warray-bounds=0 endif CXXFLAGS ?= $(CFLAGS) override CFLAGS += $(XCFLAGS) -DLIBURING_BUILD_TEST override CXXFLAGS += $(XCFLAGS) -std=c++11 -DLIBURING_BUILD_TEST LDFLAGS ?= override LDFLAGS += -L../src/ -luring -lpthread # Please keep this list sorted alphabetically. test_srcs := \ 232c93d07b74.c \ 35fa71a030ca.c \ 500f9fbadef8.c \ 7ad0e4b2f83c.c \ 8a9973408177.c \ 917257daa0fe.c \ a0908ae19763.c \ a4c0b3decb33.c \ accept.c \ accept-link.c \ accept-reuse.c \ accept-test.c \ across-fork.c \ b19062a56726.c \ b5837bd5311d.c \ buf-ring.c \ buf-ring-nommap.c \ buf-ring-put.c \ ce593a6c480a.c \ close-opath.c \ connect.c \ connect-rep.c \ coredump.c \ cq-full.c \ cq-overflow.c \ cq-peek-batch.c \ cq-ready.c \ cq-size.c \ d4ae271dfaae.c \ d77a67ed5f27.c \ defer.c \ defer-taskrun.c \ defer-tw-timeout.c \ double-poll-crash.c \ drop-submit.c \ eeed8b54e0df.c \ empty-eownerdead.c \ eploop.c \ eventfd.c \ eventfd-disable.c \ eventfd-reg.c \ eventfd-ring.c \ evloop.c \ exec-target.c \ exit-no-cleanup.c \ fadvise.c \ fallocate.c \ fc2a85cb02ef.c \ fd-install.c \ fd-pass.c \ file-register.c \ files-exit-hang-poll.c \ files-exit-hang-timeout.c \ file-update.c \ file-verify.c \ fixed-buf-iter.c \ fixed-buf-merge.c \ fixed-link.c \ fixed-reuse.c \ fpos.c \ fsnotify.c \ fsync.c \ futex.c \ hardlink.c \ io-cancel.c \ iopoll.c \ iopoll-leak.c \ iopoll-overflow.c \ io_uring_enter.c \ io_uring_passthrough.c \ io_uring_register.c \ io_uring_setup.c \ lfs-openat.c \ lfs-openat-write.c \ link.c \ link_drain.c \ link-timeout.c \ madvise.c \ mkdir.c \ msg-ring.c \ msg-ring-fd.c \ msg-ring-flags.c \ msg-ring-overflow.c \ multicqes_drain.c \ no-mmap-inval.c \ nolibc.c \ nop-all-sizes.c \ nop.c \ openat2.c \ open-close.c \ open-direct-link.c \ open-direct-pick.c \ personality.c \ pipe-bug.c \ pipe-eof.c \ pipe-reuse.c \ poll.c \ poll-cancel.c \ poll-cancel-all.c \ poll-cancel-ton.c \ poll-link.c \ poll-many.c \ poll-mshot-overflow.c \ poll-mshot-update.c \ poll-race.c \ poll-race-mshot.c \ poll-ring.c \ poll-v-poll.c \ probe.c \ read-before-exit.c \ read-mshot.c \ read-mshot-empty.c \ read-write.c \ recv-msgall.c \ recv-msgall-stream.c \ recv-multishot.c \ reg-fd-only.c \ reg-hint.c \ reg-reg-ring.c \ regbuf-merge.c \ register-restrictions.c \ rename.c \ ringbuf-read.c \ ringbuf-status.c \ ring-leak2.c \ ring-leak.c \ rsrc_tags.c \ rw_merge_test.c \ self.c \ send_recv.c \ send_recvmsg.c \ send-zerocopy.c \ shared-wq.c \ short-read.c \ shutdown.c \ sigfd-deadlock.c \ single-issuer.c \ skip-cqe.c \ socket.c \ socket-io-cmd.c \ socket-getsetsock-cmd.c \ socket-rw.c \ socket-rw-eagain.c \ socket-rw-offset.c \ splice.c \ sq-full.c \ sq-full-cpp.cc \ sqpoll-disable-exit.c \ sq-poll-dup.c \ sqpoll-exit-hang.c \ sq-poll-kthread.c \ sq-poll-share.c \ sqpoll-sleep.c \ sq-space_left.c \ stdout.c \ submit-and-wait.c \ submit-link-fail.c \ submit-reuse.c \ symlink.c \ sync-cancel.c \ teardowns.c \ thread-exit.c \ timeout.c \ timeout-new.c \ truncate.c \ tty-write-dpoll.c \ unlink.c \ version.c \ waitid.c \ wakeup-hang.c \ wq-aff.c \ xattr.c \ # EOL all_targets := include ../Makefile.quiet ifdef CONFIG_HAVE_STATX test_srcs += statx.c else ifdef CONFIG_HAVE_GLIBC_STATX test_srcs += statx.c endif all_targets += statx.t ifdef CONFIG_HAVE_CXX test_srcs += sq-full-cpp.cc endif all_targets += sq-full-cpp.t test_targets := $(patsubst %.c,%,$(test_srcs)) test_targets := $(patsubst %.cc,%,$(test_targets)) run_test_targets := $(patsubst %,%.run_test,$(test_targets)) test_targets := $(patsubst %,%.t,$(test_targets)) all_targets += $(test_targets) helpers = helpers.o all: $(test_targets) helpers.o: helpers.c $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $< LIBURING := $(shell if [ -e ../src/liburing.a ]; then echo ../src/liburing.a; fi) %.t: %.c $(helpers) helpers.h $(LIBURING) $(QUIET_CC)$(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< $(helpers) $(LDFLAGS) # # Clang++ is not happy with -Wmissing-prototypes: # # cc1plus: warning: command-line option '-Wmissing-prototypes' \ # is valid for C/ObjC but not for C++ # %.t: %.cc $(helpers) helpers.h $(LIBURING) $(QUIET_CXX)$(CXX) \ $(patsubst -Wmissing-prototypes,,$(CPPFLAGS)) \ $(patsubst -Wmissing-prototypes,,$(CXXFLAGS)) \ -o $@ $< $(helpers) $(LDFLAGS) install: $(test_targets) runtests.sh runtests-loop.sh $(INSTALL) -D -d -m 755 $(datadir)/liburing-test/ $(INSTALL) -D -m 755 $(test_targets) $(datadir)/liburing-test/ $(INSTALL) -D -m 755 runtests.sh $(datadir)/liburing-test/ $(INSTALL) -D -m 755 runtests-loop.sh $(datadir)/liburing-test/ uninstall: @rm -rf $(datadir)/liburing-test/ clean: @rm -f $(all_targets) helpers.o output/* @rm -rf output/ runtests: all @./runtests.sh $(test_targets) runtests-loop: all @./runtests-loop.sh $(test_targets) %.run_test: %.t @./runtests-quiet.sh $< runtests-parallel: $(run_test_targets) @echo "All tests passed" .PHONY: all install clean runtests runtests-loop runtests-parallel liburing-2.6/test/a0908ae19763.c000066400000000000000000000031271461424365000160100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" static uint64_t r[1] = {0xffffffffffffffff}; int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0); intptr_t res = 0; *(uint32_t*)0x20000080 = 0; *(uint32_t*)0x20000084 = 0; *(uint32_t*)0x20000088 = 0; *(uint32_t*)0x2000008c = 0; *(uint32_t*)0x20000090 = 0; *(uint32_t*)0x20000094 = 0; *(uint32_t*)0x20000098 = 0; *(uint32_t*)0x2000009c = 0; *(uint32_t*)0x200000a0 = 0; *(uint32_t*)0x200000a4 = 0; *(uint32_t*)0x200000a8 = 0; *(uint32_t*)0x200000ac = 0; *(uint32_t*)0x200000b0 = 0; *(uint32_t*)0x200000b4 = 0; *(uint32_t*)0x200000b8 = 0; *(uint32_t*)0x200000bc = 0; *(uint32_t*)0x200000c0 = 0; *(uint32_t*)0x200000c4 = 0; *(uint64_t*)0x200000c8 = 0; *(uint32_t*)0x200000d0 = 0; *(uint32_t*)0x200000d4 = 0; *(uint32_t*)0x200000d8 = 0; *(uint32_t*)0x200000dc = 0; *(uint32_t*)0x200000e0 = 0; *(uint32_t*)0x200000e4 = 0; *(uint32_t*)0x200000e8 = 0; *(uint32_t*)0x200000ec = 0; *(uint64_t*)0x200000f0 = 0; res = __sys_io_uring_setup(0xa4, (struct io_uring_params *) 0x20000080); if (res != -1) r[0] = res; *(uint32_t*)0x20000280 = -1; __sys_io_uring_register(r[0], 2, (const void *) 0x20000280, 1); return T_EXIT_PASS; } liburing-2.6/test/a4c0b3decb33.c000066400000000000000000000073161461424365000162730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); int i; for (i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_test(void) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { int iter; for (iter = 0; iter < 50; iter++) { int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { setup_test(); execute_one(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; sleep_ms(1); if (current_time_ms() - start < 5 * 1000) continue; kill_and_wait(pid, &status); break; } } } void execute_one(void) { *(uint32_t*)0x20000080 = 0; *(uint32_t*)0x20000084 = 0; *(uint32_t*)0x20000088 = 3; *(uint32_t*)0x2000008c = 3; *(uint32_t*)0x20000090 = 0x175; *(uint32_t*)0x20000094 = 0; *(uint32_t*)0x20000098 = 0; *(uint32_t*)0x2000009c = 0; *(uint32_t*)0x200000a0 = 0; *(uint32_t*)0x200000a4 = 0; *(uint32_t*)0x200000a8 = 0; *(uint32_t*)0x200000ac = 0; *(uint32_t*)0x200000b0 = 0; *(uint32_t*)0x200000b4 = 0; *(uint32_t*)0x200000b8 = 0; *(uint32_t*)0x200000bc = 0; *(uint32_t*)0x200000c0 = 0; *(uint32_t*)0x200000c4 = 0; *(uint64_t*)0x200000c8 = 0; *(uint32_t*)0x200000d0 = 0; *(uint32_t*)0x200000d4 = 0; *(uint32_t*)0x200000d8 = 0; *(uint32_t*)0x200000dc = 0; *(uint32_t*)0x200000e0 = 0; *(uint32_t*)0x200000e4 = 0; *(uint32_t*)0x200000e8 = 0; *(uint32_t*)0x200000ec = 0; *(uint64_t*)0x200000f0 = 0; __sys_io_uring_setup(0x983, (struct io_uring_params *) 0x20000080); } static void sig_int(int sig) { exit(0); } int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; signal(SIGINT, sig_int); mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0); loop(); return T_EXIT_PASS; } liburing-2.6/test/accept-link.c000066400000000000000000000117021461424365000165170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static int recv_thread_ready = 0; static int recv_thread_done = 0; static void signal_var(int *var) { pthread_mutex_lock(&mutex); *var = 1; pthread_cond_signal(&cond); pthread_mutex_unlock(&mutex); } static void wait_for_var(int *var) { pthread_mutex_lock(&mutex); while (!*var) pthread_cond_wait(&cond, &mutex); pthread_mutex_unlock(&mutex); } struct data { unsigned expected[2]; unsigned just_positive[2]; unsigned long timeout; unsigned short port; unsigned int addr; int stop; }; static void *send_thread(void *arg) { struct data *data = arg; int ret; wait_for_var(&recv_thread_ready); if (data->stop) return NULL; int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); assert(s0 != -1); struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_port = data->port; addr.sin_addr.s_addr = data->addr; ret = connect(s0, (struct sockaddr*)&addr, sizeof(addr)); assert(ret != -1); wait_for_var(&recv_thread_done); close(s0); return NULL; } static void *recv_thread(void *arg) { struct data *data = arg; struct io_uring ring; int i, ret; ret = io_uring_queue_init(8, &ring, 0); assert(ret == 0); int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); assert(s0 != -1); int32_t val = 1; ret = setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); struct sockaddr_in addr; addr.sin_family = AF_INET; data->addr = inet_addr("127.0.0.1"); addr.sin_addr.s_addr = data->addr; i = 0; do { data->port = htons(1025 + (rand() % 64510)); addr.sin_port = data->port; if (bind(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1) break; } while (++i < 100); if (i >= 100) { printf("Can't find good port, skipped\n"); data->stop = 1; signal_var(&recv_thread_ready); goto out; } ret = listen(s0, 128); assert(ret != -1); signal_var(&recv_thread_ready); struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(&ring); assert(sqe != NULL); io_uring_prep_accept(sqe, s0, NULL, NULL, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); assert(sqe != NULL); struct __kernel_timespec ts; ts.tv_sec = data->timeout / 1000000000; ts.tv_nsec = data->timeout % 1000000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; ret = io_uring_submit(&ring); assert(ret == 2); for (i = 0; i < 2; i++) { struct io_uring_cqe *cqe; int idx; if (io_uring_wait_cqe(&ring, &cqe)) { fprintf(stderr, "wait cqe failed\n"); goto err; } idx = cqe->user_data - 1; if (cqe->res != data->expected[idx]) { if (cqe->res > 0 && data->just_positive[idx]) goto ok; if (cqe->res == -EBADF) { fprintf(stdout, "Accept not supported, skipping\n"); data->stop = 1; goto out; } fprintf(stderr, "cqe %" PRIu64 " got %d, wanted %d\n", (uint64_t) cqe->user_data, cqe->res, data->expected[idx]); goto err; } ok: if (cqe->user_data == 1 && cqe->res > 0) close(cqe->res); io_uring_cqe_seen(&ring, cqe); } signal_var(&recv_thread_done); out: close(s0); return NULL; err: close(s0); return (void *) 1; } static int test_accept_timeout(int do_connect, unsigned long timeout) { struct io_uring ring; struct io_uring_params p = {}; pthread_t t1, t2; struct data d; void *tret; int ret, fast_poll; ret = io_uring_queue_init_params(1, &ring, &p); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return 1; } fast_poll = (p.features & IORING_FEAT_FAST_POLL) != 0; io_uring_queue_exit(&ring); recv_thread_ready = 0; recv_thread_done = 0; memset(&d, 0, sizeof(d)); d.timeout = timeout; if (!do_connect) { if (fast_poll) { d.expected[0] = -ECANCELED; d.expected[1] = -ETIME; } else { d.expected[0] = -EINTR; d.expected[1] = -EALREADY; } } else { d.expected[0] = -1U; d.just_positive[0] = 1; d.expected[1] = -ECANCELED; } pthread_create(&t1, NULL, recv_thread, &d); if (do_connect) pthread_create(&t2, NULL, send_thread, &d); pthread_join(t1, &tret); if (tret) ret++; if (do_connect) { pthread_join(t2, &tret); if (tret) ret++; } return ret; } int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; if (test_accept_timeout(0, 200000000)) { fprintf(stderr, "accept timeout 0 failed\n"); return T_EXIT_FAIL; } if (test_accept_timeout(1, 1000000000)) { fprintf(stderr, "accept and connect timeout 0 failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/accept-reuse.c000066400000000000000000000072541461424365000167140ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" static struct io_uring io_uring; static int sys_io_uring_enter(const int fd, const unsigned to_submit, const unsigned min_complete, const unsigned flags, sigset_t * const sig) { return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig); } static int submit_sqe(void) { struct io_uring_sq *sq = &io_uring.sq; const unsigned tail = *sq->ktail; sq->array[tail & sq->ring_mask] = 0; io_uring_smp_store_release(sq->ktail, tail + 1); return sys_io_uring_enter(io_uring.ring_fd, 1, 0, 0, NULL); } int main(int argc, char **argv) { struct addrinfo *addr_info_list = NULL; struct addrinfo *ai, *addr_info = NULL; struct io_uring_params params; struct io_uring_sqe *sqe; struct addrinfo hints; struct sockaddr sa; socklen_t sa_size = sizeof(sa); int ret, listen_fd, connect_fd, val, i; if (argc > 1) return T_EXIT_SKIP; memset(¶ms, 0, sizeof(params)); ret = t_io_uring_init_sqarray(4, &io_uring, ¶ms); if (ret) { fprintf(stderr, "io_uring_init_failed: %d\n", ret); return T_EXIT_FAIL; } if (!(params.features & IORING_FEAT_SUBMIT_STABLE)) { fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n"); return T_EXIT_SKIP; } memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE | AI_NUMERICSERV; ret = getaddrinfo(NULL, "12345", &hints, &addr_info_list); if (ret < 0) { perror("getaddrinfo"); return T_EXIT_FAIL; } for (ai = addr_info_list; ai; ai = ai->ai_next) { if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6) { addr_info = ai; break; } } if (!addr_info) { fprintf(stderr, "addrinfo not found\n"); return T_EXIT_FAIL; } sqe = &io_uring.sq.sqes[0]; listen_fd = -1; ret = socket(addr_info->ai_family, SOCK_STREAM, addr_info->ai_protocol); if (ret < 0) { perror("socket"); return T_EXIT_FAIL; } listen_fd = ret; val = 1; setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)); setsockopt(listen_fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(int)); ret = bind(listen_fd, addr_info->ai_addr, addr_info->ai_addrlen); if (ret < 0) { perror("bind"); return T_EXIT_FAIL; } ret = listen(listen_fd, SOMAXCONN); if (ret < 0) { perror("listen"); return T_EXIT_FAIL; } memset(&sa, 0, sizeof(sa)); io_uring_prep_accept(sqe, listen_fd, &sa, &sa_size, 0); sqe->user_data = 1; ret = submit_sqe(); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); return T_EXIT_FAIL; } connect_fd = -1; ret = socket(addr_info->ai_family, SOCK_STREAM, addr_info->ai_protocol); if (ret < 0) { perror("socket"); return T_EXIT_FAIL; } connect_fd = ret; io_uring_prep_connect(sqe, connect_fd, addr_info->ai_addr, addr_info->ai_addrlen); sqe->user_data = 2; ret = submit_sqe(); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { struct io_uring_cqe *cqe = NULL; ret = io_uring_wait_cqe(&io_uring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe: %d\n", ret); return T_EXIT_FAIL; } switch (cqe->user_data) { case 1: if (cqe->res < 0) { fprintf(stderr, "accept failed: %d\n", cqe->res); return T_EXIT_FAIL; } break; case 2: if (cqe->res) { fprintf(stderr, "connect failed: %d\n", cqe->res); return T_EXIT_FAIL; } break; } io_uring_cq_advance(&io_uring, 1); } freeaddrinfo(addr_info_list); io_uring_queue_exit(&io_uring); return T_EXIT_PASS; } liburing-2.6/test/accept-test.c000066400000000000000000000034641461424365000165470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Check to see if accept handles addr and addrlen */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct sockaddr_un addr; socklen_t addrlen = sizeof(addr); int ret, fd; struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 1000000 }; if (argc > 1) return T_EXIT_SKIP; if (io_uring_queue_init(4, &ring, 0) != 0) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } fd = socket(AF_UNIX, SOCK_STREAM, 0); assert(fd != -1); memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; memcpy(addr.sun_path, "\0sock2", 7); ret = bind(fd, (struct sockaddr *)&addr, addrlen); assert(ret != -1); ret = listen(fd, 128); assert(ret != -1); sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } io_uring_prep_accept(sqe, fd, (struct sockaddr*)&addr, &addrlen, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "Got submit %d, expected 1\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts); if (!ret) { if (cqe->res == -EBADF || cqe->res == -EINVAL) { fprintf(stdout, "Accept not supported, skipping\n"); goto skip; } else if (cqe->res < 0) { fprintf(stderr, "cqe error %d\n", cqe->res); goto err; } } else if (ret != -ETIME) { fprintf(stderr, "accept() failed to use addr & addrlen parameters!\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; skip: io_uring_queue_exit(&ring); return T_EXIT_SKIP; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/accept.c000066400000000000000000000477041461424365000155770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that IORING_OP_ACCEPT works, and send some data across to verify we * didn't get a junk fd. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define MAX_FDS 32 #define NOP_USER_DATA (1LLU << 50) #define INITIAL_USER_DATA 1000 static int no_accept; static int no_accept_multi; struct data { char buf[128]; struct iovec iov; }; struct accept_test_args { int accept_should_error; bool fixed; bool nonblock; bool queue_accept_before_connect; bool multishot; int extra_loops; bool overflow; }; static void close_fds(int fds[], int nr) { int i; for (i = 0; i < nr; i++) close(fds[i]); } static void close_sock_fds(int s_fd[], int c_fd[], int nr, bool fixed) { if (!fixed) close_fds(s_fd, nr); close_fds(c_fd, nr); } static void queue_send(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; struct data *d; d = t_malloc(sizeof(*d)); d->iov.iov_base = d->buf; d->iov.iov_len = sizeof(d->buf); sqe = io_uring_get_sqe(ring); io_uring_prep_writev(sqe, fd, &d->iov, 1, 0); sqe->user_data = 1; } static void queue_recv(struct io_uring *ring, int fd, bool fixed) { struct io_uring_sqe *sqe; struct data *d; d = t_malloc(sizeof(*d)); d->iov.iov_base = d->buf; d->iov.iov_len = sizeof(d->buf); sqe = io_uring_get_sqe(ring); io_uring_prep_readv(sqe, fd, &d->iov, 1, 0); sqe->user_data = 2; if (fixed) sqe->flags |= IOSQE_FIXED_FILE; } static void queue_accept_multishot(struct io_uring *ring, int fd, int idx, bool fixed) { struct io_uring_sqe *sqe = io_uring_get_sqe(ring); int ret; if (fixed) io_uring_prep_multishot_accept_direct(sqe, fd, NULL, NULL, 0); else io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0); io_uring_sqe_set_data64(sqe, idx); ret = io_uring_submit(ring); assert(ret != -1); } static void queue_accept_conn(struct io_uring *ring, int fd, struct accept_test_args args) { struct io_uring_sqe *sqe; int ret; int fixed_idx = args.fixed ? 0 : -1; int count = 1 + args.extra_loops; if (args.multishot) { queue_accept_multishot(ring, fd, INITIAL_USER_DATA, args.fixed); return; } while (count--) { sqe = io_uring_get_sqe(ring); if (fixed_idx < 0) { io_uring_prep_accept(sqe, fd, NULL, NULL, 0); } else { io_uring_prep_accept_direct(sqe, fd, NULL, NULL, 0, fixed_idx); } ret = io_uring_submit(ring); assert(ret != -1); } } static int accept_conn(struct io_uring *ring, int fixed_idx, int *multishot, int fd) { struct io_uring_cqe *pcqe; struct io_uring_cqe cqe; int ret; do { ret = io_uring_wait_cqe(ring, &pcqe); assert(!ret); cqe = *pcqe; io_uring_cqe_seen(ring, pcqe); } while (cqe.user_data == NOP_USER_DATA); if (*multishot) { if (!(cqe.flags & IORING_CQE_F_MORE)) { (*multishot)++; queue_accept_multishot(ring, fd, *multishot, fixed_idx == 0); } else { if (cqe.user_data != *multishot) { fprintf(stderr, "received multishot after told done!\n"); return -ECANCELED; } } } ret = cqe.res; if (fixed_idx >= 0) { if (ret > 0) { if (!multishot) { close(ret); return -EINVAL; } } else if (!ret) { ret = fixed_idx; } } return ret; } static int start_accept_listen(struct sockaddr_in *addr, int port_off, int extra_flags) { int fd, ret; fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC | extra_flags, IPPROTO_TCP); int32_t val = 1; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); struct sockaddr_in laddr; if (!addr) addr = &laddr; addr->sin_family = AF_INET; addr->sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(fd, addr); assert(!ret); ret = listen(fd, 128); assert(ret != -1); return fd; } static int set_client_fd(struct sockaddr_in *addr) { int32_t val; int fd, ret; fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); val = 1; ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); int32_t flags = fcntl(fd, F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(fd, F_SETFL, flags); assert(ret != -1); ret = connect(fd, (struct sockaddr *)addr, sizeof(*addr)); assert(ret == -1); flags = fcntl(fd, F_GETFL, 0); assert(flags != -1); flags &= ~O_NONBLOCK; ret = fcntl(fd, F_SETFL, flags); assert(ret != -1); return fd; } static void cause_overflow(struct io_uring *ring) { int i, ret; for (i = 0; i < ring->cq.ring_entries; i++) { struct io_uring_sqe *sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data64(sqe, NOP_USER_DATA); ret = io_uring_submit(ring); assert(ret != -1); } } static void clear_overflow(struct io_uring *ring) { struct io_uring_cqe *cqe; while (!io_uring_peek_cqe(ring, &cqe)) { if (cqe->user_data != NOP_USER_DATA) break; io_uring_cqe_seen(ring, cqe); } } static int test_loop(struct io_uring *ring, struct accept_test_args args, int recv_s0, struct sockaddr_in *addr) { struct io_uring_cqe *cqe; uint32_t head, count = 0; int i, ret, s_fd[MAX_FDS], c_fd[MAX_FDS], done = 0; bool fixed = args.fixed; bool multishot = args.multishot; uint32_t multishot_mask = 0; int nr_fds = multishot ? MAX_FDS : 1; int multishot_idx = multishot ? INITIAL_USER_DATA : 0; int err_ret = T_EXIT_FAIL; if (args.overflow) cause_overflow(ring); for (i = 0; i < nr_fds; i++) { c_fd[i] = set_client_fd(addr); if (args.overflow && i == nr_fds / 2) clear_overflow(ring); } if (!args.queue_accept_before_connect) queue_accept_conn(ring, recv_s0, args); for (i = 0; i < nr_fds; i++) { s_fd[i] = accept_conn(ring, fixed ? 0 : -1, &multishot_idx, recv_s0); if (s_fd[i] == -EINVAL) { if (args.accept_should_error) goto out; fprintf(stdout, "%s %s Accept not supported, skipping\n", fixed ? "Fixed" : "", multishot ? "Multishot" : ""); if (multishot) no_accept_multi = 1; else no_accept = 1; ret = T_EXIT_SKIP; goto out; } else if (s_fd[i] < 0) { if (args.accept_should_error && (s_fd[i] == -EBADF || s_fd[i] == -EINVAL)) goto out; fprintf(stderr, "%s %s Accept[%d] got %d\n", fixed ? "Fixed" : "", multishot ? "Multishot" : "", i, s_fd[i]); goto err; } else if (s_fd[i] == 195 && args.overflow) { fprintf(stderr, "Broken overflow handling\n"); goto err; } if (multishot && fixed) { if (s_fd[i] >= MAX_FDS) { fprintf(stderr, "Fixed Multishot Accept[%d] got outbound index: %d\n", i, s_fd[i]); goto err; } /* * for fixed multishot accept test, the file slots * allocated are [0, 32), this means we finally end up * with each bit of a u32 being 1. */ multishot_mask |= (1U << s_fd[i]); } } if (multishot) { if (fixed && (~multishot_mask != 0U)) { fprintf(stderr, "Fixed Multishot Accept misses events\n"); goto err; } goto out; } queue_send(ring, c_fd[0]); queue_recv(ring, s_fd[0], fixed); ret = io_uring_submit_and_wait(ring, 2); assert(ret != -1); while (count < 2) { io_uring_for_each_cqe(ring, head, cqe) { if (cqe->res < 0) { fprintf(stderr, "Got cqe res %d, user_data %i\n", cqe->res, (int)cqe->user_data); done = 1; break; } assert(cqe->res == 128); count++; } assert(count <= 2); io_uring_cq_advance(ring, count); if (done) goto err; } out: close_sock_fds(s_fd, c_fd, nr_fds, fixed); return T_EXIT_PASS; err: close_sock_fds(s_fd, c_fd, nr_fds, fixed); return err_ret; } static int test(struct io_uring *ring, struct accept_test_args args) { struct sockaddr_in addr; int ret = 0; int loop; int32_t recv_s0 = start_accept_listen(&addr, 0, args.nonblock ? SOCK_NONBLOCK : 0); if (args.queue_accept_before_connect) queue_accept_conn(ring, recv_s0, args); for (loop = 0; loop < 1 + args.extra_loops; loop++) { ret = test_loop(ring, args, recv_s0, &addr); if (ret) break; } close(recv_s0); return ret; } static void sig_alrm(int sig) { exit(0); } static int test_accept_pending_on_exit(void) { struct io_uring m_io_uring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fd, ret; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); fd = start_accept_listen(NULL, 0, 0); sqe = io_uring_get_sqe(&m_io_uring); io_uring_prep_accept(sqe, fd, NULL, NULL, 0); ret = io_uring_submit(&m_io_uring); assert(ret != -1); signal(SIGALRM, sig_alrm); alarm(1); ret = io_uring_wait_cqe(&m_io_uring, &cqe); assert(!ret); io_uring_cqe_seen(&m_io_uring, cqe); io_uring_queue_exit(&m_io_uring); return 0; } struct test_accept_many_args { unsigned int usecs; bool nonblock; bool single_sock; bool close_fds; }; /* * Test issue many accepts and see if we handle cancellation on exit */ static int test_accept_many(struct test_accept_many_args args) { struct io_uring m_io_uring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; unsigned long cur_lim; struct rlimit rlim; int *fds, i, ret; unsigned int nr = 128; int nr_socks = args.single_sock ? 1 : nr; if (getrlimit(RLIMIT_NPROC, &rlim) < 0) { perror("getrlimit"); return 1; } cur_lim = rlim.rlim_cur; rlim.rlim_cur = nr / 4; if (setrlimit(RLIMIT_NPROC, &rlim) < 0) { perror("setrlimit"); return 1; } ret = io_uring_queue_init(2 * nr, &m_io_uring, 0); assert(ret >= 0); fds = t_calloc(nr_socks, sizeof(int)); for (i = 0; i < nr_socks; i++) fds[i] = start_accept_listen(NULL, i, args.nonblock ? SOCK_NONBLOCK : 0); for (i = 0; i < nr; i++) { int sock_idx = args.single_sock ? 0 : i; sqe = io_uring_get_sqe(&m_io_uring); io_uring_prep_accept(sqe, fds[sock_idx], NULL, NULL, 0); sqe->user_data = 1 + i; ret = io_uring_submit(&m_io_uring); assert(ret == 1); } if (args.usecs) usleep(args.usecs); if (args.close_fds) for (i = 0; i < nr_socks; i++) close(fds[i]); for (i = 0; i < nr; i++) { if (io_uring_peek_cqe(&m_io_uring, &cqe)) break; if (cqe->res != -ECANCELED) { fprintf(stderr, "Expected cqe to be cancelled %d\n", cqe->res); ret = 1; goto out; } io_uring_cqe_seen(&m_io_uring, cqe); } ret = 0; out: rlim.rlim_cur = cur_lim; if (setrlimit(RLIMIT_NPROC, &rlim) < 0) { perror("setrlimit"); return 1; } free(fds); io_uring_queue_exit(&m_io_uring); return ret; } static int test_accept_cancel(unsigned usecs, unsigned int nr, bool multishot) { struct io_uring m_io_uring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fd, i, ret; if (multishot && no_accept_multi) return T_EXIT_SKIP; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); fd = start_accept_listen(NULL, 0, 0); for (i = 1; i <= nr; i++) { sqe = io_uring_get_sqe(&m_io_uring); if (!multishot) io_uring_prep_accept(sqe, fd, NULL, NULL, 0); else io_uring_prep_multishot_accept(sqe, fd, NULL, NULL, 0); sqe->user_data = i; ret = io_uring_submit(&m_io_uring); assert(ret == 1); } if (usecs) usleep(usecs); for (i = 1; i <= nr; i++) { sqe = io_uring_get_sqe(&m_io_uring); io_uring_prep_cancel64(sqe, i, 0); sqe->user_data = nr + i; ret = io_uring_submit(&m_io_uring); assert(ret == 1); } for (i = 0; i < nr * 2; i++) { ret = io_uring_wait_cqe(&m_io_uring, &cqe); assert(!ret); /* * Two cases here: * * 1) We cancel the accept4() before it got started, we should * get '0' for the cancel request and '-ECANCELED' for the * accept request. * 2) We cancel the accept4() after it's already running, we * should get '-EALREADY' for the cancel request and * '-EINTR' for the accept request. */ if (cqe->user_data == 0) { fprintf(stderr, "unexpected 0 user data\n"); goto err; } else if (cqe->user_data <= nr) { /* no multishot */ if (cqe->res == -EINVAL) return T_EXIT_SKIP; if (cqe->res != -EINTR && cqe->res != -ECANCELED) { fprintf(stderr, "Cancelled accept got %d\n", cqe->res); goto err; } } else if (cqe->user_data <= nr * 2) { if (cqe->res != -EALREADY && cqe->res != 0) { fprintf(stderr, "Cancel got %d\n", cqe->res); goto err; } } io_uring_cqe_seen(&m_io_uring, cqe); } io_uring_queue_exit(&m_io_uring); close(fd); return 0; err: io_uring_queue_exit(&m_io_uring); close(fd); return 1; } static int test_accept(int count, bool before) { struct io_uring m_io_uring; int ret; struct accept_test_args args = { .queue_accept_before_connect = before, .extra_loops = count - 1 }; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } static int test_multishot_accept(int count, bool before, bool overflow) { struct io_uring m_io_uring; int ret; struct accept_test_args args = { .queue_accept_before_connect = before, .multishot = true, .extra_loops = count - 1, .overflow = overflow }; if (no_accept_multi) return T_EXIT_SKIP; ret = io_uring_queue_init(MAX_FDS + 10, &m_io_uring, 0); assert(ret >= 0); ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } static int test_accept_multishot_wrong_arg(void) { struct io_uring m_io_uring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fd, ret; ret = io_uring_queue_init(4, &m_io_uring, 0); assert(ret >= 0); fd = start_accept_listen(NULL, 0, 0); sqe = io_uring_get_sqe(&m_io_uring); io_uring_prep_multishot_accept_direct(sqe, fd, NULL, NULL, 0); sqe->file_index = 1; ret = io_uring_submit(&m_io_uring); assert(ret == 1); ret = io_uring_wait_cqe(&m_io_uring, &cqe); assert(!ret); if (cqe->res != -EINVAL) { fprintf(stderr, "file index should be IORING_FILE_INDEX_ALLOC \ if its accept in multishot direct mode\n"); goto err; } io_uring_cqe_seen(&m_io_uring, cqe); io_uring_queue_exit(&m_io_uring); close(fd); return 0; err: io_uring_queue_exit(&m_io_uring); close(fd); return 1; } static int test_accept_nonblock(bool queue_before_connect, int count) { struct io_uring m_io_uring; int ret; struct accept_test_args args = { .nonblock = true, .queue_accept_before_connect = queue_before_connect, .extra_loops = count - 1 }; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } static int test_accept_fixed(void) { struct io_uring m_io_uring; int ret, fd = -1; struct accept_test_args args = { .fixed = true }; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); ret = io_uring_register_files(&m_io_uring, &fd, 1); if (ret) { /* kernel doesn't support sparse registered files, skip */ if (ret == -EBADF || ret == -EINVAL) return T_EXIT_SKIP; return T_EXIT_FAIL; } ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } static int test_multishot_fixed_accept(void) { struct io_uring m_io_uring; int ret, fd[MAX_FDS]; struct accept_test_args args = { .fixed = true, .multishot = true }; if (no_accept_multi) return T_EXIT_SKIP; memset(fd, -1, sizeof(fd)); ret = io_uring_queue_init(MAX_FDS + 10, &m_io_uring, 0); assert(ret >= 0); ret = io_uring_register_files(&m_io_uring, fd, MAX_FDS); if (ret) { /* kernel doesn't support sparse registered files, skip */ if (ret == -EBADF || ret == -EINVAL) return T_EXIT_SKIP; return T_EXIT_FAIL; } ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } static int test_accept_sqpoll(void) { struct io_uring m_io_uring; struct io_uring_params p = { }; int ret; struct accept_test_args args = { }; p.flags = IORING_SETUP_SQPOLL; ret = t_create_ring_params(32, &m_io_uring, &p); if (ret == T_SETUP_SKIP) return 0; else if (ret < 0) return ret; args.accept_should_error = 1; if (p.features & IORING_FEAT_SQPOLL_NONFIXED) args.accept_should_error = 0; ret = test(&m_io_uring, args); io_uring_queue_exit(&m_io_uring); return ret; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test_accept(1, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept failed\n"); return ret; } if (no_accept) return T_EXIT_SKIP; ret = test_accept(2, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept(2) failed\n"); return ret; } ret = test_accept(2, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept(2, true) failed\n"); return ret; } ret = test_accept_nonblock(false, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_nonblock failed\n"); return ret; } ret = test_accept_nonblock(true, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_nonblock(before, 1) failed\n"); return ret; } ret = test_accept_nonblock(true, 3); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_nonblock(before,3) failed\n"); return ret; } ret = test_accept_fixed(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_fixed failed\n"); return ret; } ret = test_multishot_fixed_accept(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_multishot_fixed_accept failed\n"); return ret; } ret = test_accept_multishot_wrong_arg(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_multishot_wrong_arg failed\n"); return ret; } ret = test_accept_sqpoll(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_sqpoll failed\n"); return ret; } ret = test_accept_cancel(0, 1, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel nodelay failed\n"); return ret; } ret = test_accept_cancel(10000, 1, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel delay failed\n"); return ret; } ret = test_accept_cancel(0, 4, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel nodelay failed\n"); return ret; } ret = test_accept_cancel(10000, 4, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel delay failed\n"); return ret; } ret = test_accept_cancel(0, 1, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel multishot nodelay failed\n"); return ret; } ret = test_accept_cancel(10000, 1, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel multishot delay failed\n"); return ret; } ret = test_accept_cancel(0, 4, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel multishot nodelay failed\n"); return ret; } ret = test_accept_cancel(10000, 4, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_cancel multishot delay failed\n"); return ret; } ret = test_multishot_accept(1, true, true); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_multishot_accept(1, false, true) failed\n"); return ret; } ret = test_multishot_accept(1, false, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_multishot_accept(1, false, false) failed\n"); return ret; } ret = test_multishot_accept(1, true, false); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_multishot_accept(1, true, false) failed\n"); return ret; } ret = test_accept_many((struct test_accept_many_args) {}); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_many failed\n"); return ret; } ret = test_accept_many((struct test_accept_many_args) { .usecs = 100000 }); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_many(sleep) failed\n"); return ret; } ret = test_accept_many((struct test_accept_many_args) { .nonblock = true }); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_many(nonblock) failed\n"); return ret; } ret = test_accept_many((struct test_accept_many_args) { .nonblock = true, .single_sock = true, .close_fds = true }); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_many(nonblock,close) failed\n"); return ret; } ret = test_accept_pending_on_exit(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_accept_pending_on_exit failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/across-fork.c000066400000000000000000000141671461424365000165660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test sharing a ring across a fork */ #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" struct forktestmem { struct io_uring ring; pthread_barrier_t barrier; pthread_barrierattr_t barrierattr; }; static int open_tempfile(const char *dir, const char *fname) { int fd; char buf[32]; snprintf(buf, sizeof(buf), "%s/%s", dir, fname); fd = open(buf, O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR); if (fd < 0) { perror("open"); exit(1); } return fd; } static int submit_write(struct io_uring *ring, int fd, const char *str, int wait) { struct io_uring_sqe *sqe; struct iovec iovec; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "could not get sqe\n"); return 1; } iovec.iov_base = (char *) str; iovec.iov_len = strlen(str); io_uring_prep_writev(sqe, fd, &iovec, 1, 0); ret = io_uring_submit_and_wait(ring, wait); if (ret < 0) { fprintf(stderr, "submit failed: %s\n", strerror(-ret)); return 1; } return 0; } static int wait_cqe(struct io_uring *ring, const char *stage) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "%s wait_cqe failed %d\n", stage, ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "%s cqe failed %d\n", stage, cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); return 0; } static int verify_file(const char *tmpdir, const char *fname, const char* expect) { int fd; char buf[512]; int err = 0; memset(buf, 0, sizeof(buf)); fd = open_tempfile(tmpdir, fname); if (fd < 0) return 1; if (read(fd, buf, sizeof(buf) - 1) < 0) return 1; if (strcmp(buf, expect) != 0) { fprintf(stderr, "content mismatch for %s\n" "got:\n%s\n" "expected:\n%s\n", fname, buf, expect); err = 1; } close(fd); return err; } static void cleanup(const char *tmpdir) { char buf[32]; /* don't check errors, called during partial runs */ snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "shared"); unlink(buf); snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "parent1"); unlink(buf); snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "parent2"); unlink(buf); snprintf(buf, sizeof(buf), "%s/%s", tmpdir, "child"); unlink(buf); rmdir(tmpdir); } int main(int argc, char *argv[]) { struct forktestmem *shmem; char tmpdir[] = "forktmpXXXXXX"; int shared_fd; int ret; pid_t p; if (argc > 1) return T_EXIT_SKIP; shmem = mmap(0, sizeof(struct forktestmem), PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); if (!shmem) { fprintf(stderr, "mmap failed\n"); exit(T_EXIT_FAIL); } pthread_barrierattr_init(&shmem->barrierattr); pthread_barrierattr_setpshared(&shmem->barrierattr, 1); pthread_barrier_init(&shmem->barrier, &shmem->barrierattr, 2); ret = io_uring_queue_init(10, &shmem->ring, 0); if (ret < 0) { fprintf(stderr, "queue init failed\n"); exit(T_EXIT_FAIL); } if (mkdtemp(tmpdir) == NULL) { fprintf(stderr, "temp directory creation failed\n"); exit(T_EXIT_FAIL); } shared_fd = open_tempfile(tmpdir, "shared"); /* * First do a write before the fork, to test whether child can * reap that */ if (submit_write(&shmem->ring, shared_fd, "before fork: write shared fd\n", 0)) goto errcleanup; p = fork(); switch (p) { case -1: fprintf(stderr, "fork failed\n"); goto errcleanup; default: { /* parent */ int parent_fd1; int parent_fd2; int wstatus; /* wait till fork is started up */ pthread_barrier_wait(&shmem->barrier); parent_fd1 = open_tempfile(tmpdir, "parent1"); parent_fd2 = open_tempfile(tmpdir, "parent2"); /* do a parent write to the shared fd */ if (submit_write(&shmem->ring, shared_fd, "parent: write shared fd\n", 0)) goto errcleanup; /* do a parent write to an fd where same numbered fd exists in child */ if (submit_write(&shmem->ring, parent_fd1, "parent: write parent fd 1\n", 0)) goto errcleanup; /* do a parent write to an fd where no same numbered fd exists in child */ if (submit_write(&shmem->ring, parent_fd2, "parent: write parent fd 2\n", 0)) goto errcleanup; /* wait to switch read/writ roles with child */ pthread_barrier_wait(&shmem->barrier); /* now wait for child to exit, to ensure we still can read completion */ waitpid(p, &wstatus, 0); if (WEXITSTATUS(wstatus) != 0) { fprintf(stderr, "child failed\n"); goto errcleanup; } if (wait_cqe(&shmem->ring, "p cqe 1")) goto errcleanup; if (wait_cqe(&shmem->ring, "p cqe 2")) goto errcleanup; /* check that IO can still be submitted after child exited */ if (submit_write(&shmem->ring, shared_fd, "parent: write shared fd after child exit\n", 0)) goto errcleanup; if (wait_cqe(&shmem->ring, "p cqe 3")) goto errcleanup; break; } case 0: { /* child */ int child_fd; /* wait till fork is started up */ pthread_barrier_wait(&shmem->barrier); child_fd = open_tempfile(tmpdir, "child"); if (wait_cqe(&shmem->ring, "c cqe shared")) exit(1); if (wait_cqe(&shmem->ring, "c cqe parent 1")) exit(1); if (wait_cqe(&shmem->ring, "c cqe parent 2")) exit(1); if (wait_cqe(&shmem->ring, "c cqe parent 3")) exit(1); /* wait to switch read/writ roles with parent */ pthread_barrier_wait(&shmem->barrier); if (submit_write(&shmem->ring, child_fd, "child: write child fd\n", 0)) exit(1); /* ensure both writes have finished before child exits */ if (submit_write(&shmem->ring, shared_fd, "child: write shared fd\n", 2)) exit(1); exit(0); } } if (verify_file(tmpdir, "shared", "before fork: write shared fd\n" "parent: write shared fd\n" "child: write shared fd\n" "parent: write shared fd after child exit\n") || verify_file(tmpdir, "parent1", "parent: write parent fd 1\n") || verify_file(tmpdir, "parent2", "parent: write parent fd 2\n") || verify_file(tmpdir, "child", "child: write child fd\n")) goto errcleanup; cleanup(tmpdir); exit(T_EXIT_PASS); errcleanup: cleanup(tmpdir); exit(T_EXIT_FAIL); } liburing-2.6/test/b19062a56726.c000066400000000000000000000026241461424365000157260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; mmap((void *) 0x20000000, 0x1000000, 3, MAP_ANON|MAP_PRIVATE, -1, 0); *(uint32_t*)0x20000200 = 0; *(uint32_t*)0x20000204 = 0; *(uint32_t*)0x20000208 = 5; *(uint32_t*)0x2000020c = 0x400; *(uint32_t*)0x20000210 = 0; *(uint32_t*)0x20000214 = 0; *(uint32_t*)0x20000218 = 0; *(uint32_t*)0x2000021c = 0; *(uint32_t*)0x20000220 = 0; *(uint32_t*)0x20000224 = 0; *(uint32_t*)0x20000228 = 0; *(uint32_t*)0x2000022c = 0; *(uint32_t*)0x20000230 = 0; *(uint32_t*)0x20000234 = 0; *(uint32_t*)0x20000238 = 0; *(uint32_t*)0x2000023c = 0; *(uint32_t*)0x20000240 = 0; *(uint32_t*)0x20000244 = 0; *(uint64_t*)0x20000248 = 0; *(uint32_t*)0x20000250 = 0; *(uint32_t*)0x20000254 = 0; *(uint32_t*)0x20000258 = 0; *(uint32_t*)0x2000025c = 0; *(uint32_t*)0x20000260 = 0; *(uint32_t*)0x20000264 = 0; *(uint32_t*)0x20000268 = 0; *(uint32_t*)0x2000026c = 0; *(uint64_t*)0x20000270 = 0; __sys_io_uring_setup(0xc9f, (struct io_uring_params *) 0x20000200); return T_EXIT_PASS; } liburing-2.6/test/b5837bd5311d.c000066400000000000000000000033411461424365000160610ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Check to see if wait_nr is being honored. */ #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret; struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 10000000 }; if (argc > 1) return T_EXIT_SKIP; if (io_uring_queue_init(4, &ring, 0) != 0) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } /* * First, submit the timeout sqe so we can actually finish the test * if everything is in working order. */ sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } io_uring_prep_timeout(sqe, &ts, (unsigned)-1, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "Got submit %d, expected 1\n", ret); return T_EXIT_FAIL; } /* * Next, submit a nop and wait for two events. If everything is working * as it should, we should be waiting for more than a millisecond and we * should see two cqes. Otherwise, execution continues immediately * and we see only one cqe. */ sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } io_uring_prep_nop(sqe); ret = io_uring_submit_and_wait(&ring, 2); if (ret != 1) { fprintf(stderr, "Got submit %d, expected 1\n", ret); return T_EXIT_FAIL; } if (io_uring_peek_cqe(&ring, &cqe) != 0) { fprintf(stderr, "Unable to peek cqe!\n"); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); if (io_uring_peek_cqe(&ring, &cqe) != 0) { fprintf(stderr, "Unable to peek cqe!\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/buf-ring-nommap.c000066400000000000000000000052131461424365000173230ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test IOU_PBUF_RING_MMAP with a ring setup with a ring * setup without mmap'ing sq/cq arrays * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int bgid = 5; static int bid = 89; int main(int argc, char *argv[]) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; size_t ring_size; int ret, ring_mask, fds[2]; struct io_uring_buf_reg reg = { .ring_entries = 1, .bgid = bgid, .flags = IOU_PBUF_RING_MMAP, }; struct io_uring_params p = { }; void *ring_mem; char buf[32]; off_t off; if (argc > 1) return T_EXIT_SKIP; if (posix_memalign(&ring_mem, 16384, 16384)) return T_EXIT_FAIL; p.flags = IORING_SETUP_NO_MMAP; ret = io_uring_queue_init_mem(1, &ring, &p, ring_mem, 16384); if (ret < 0) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "queue init failed %d\n", ret); return T_EXIT_FAIL; } if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ring_size = sizeof(struct io_uring_buf); ring_mask = io_uring_buf_ring_mask(1); ret = io_uring_register_buf_ring(&ring, ®, 0); if (ret) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "reg buf ring: %d\n", ret); return T_EXIT_FAIL; } off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); if (br == MAP_FAILED) { if (errno == ENOMEM) return T_EXIT_SKIP; perror("mmap"); return T_EXIT_FAIL; } io_uring_buf_ring_add(br, buf, sizeof(buf), bid, ring_mask, 0); io_uring_buf_ring_advance(br, 1); sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], NULL, 0, 0); sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = bgid; io_uring_submit(&ring); ret = write(fds[1], "Hello", 5); if (ret < 0) { perror("write"); return T_EXIT_FAIL; } else if (ret != 5) { fprintf(stderr, "short write %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "cqe res %d\n", cqe->res); return T_EXIT_FAIL; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "buffer not selected in cqe\n"); return T_EXIT_FAIL; } if ((cqe->flags >> IORING_CQE_BUFFER_SHIFT) != bid) { fprintf(stderr, "wrong buffer id returned\n"); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/buf-ring-put.c000066400000000000000000000034151461424365000166460ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test persistence of mmap'ed provided ring buffers. Use a range * of buffer group IDs that puts us into both the lower end array * and higher end xarry. * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BGID_START 60 #define BGID_NR 10 #define ENTRIES 512 int main(int argc, char *argv[]) { struct io_uring_buf_ring *br[BGID_NR]; struct io_uring ring; size_t ring_size; int ret, i, j; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed %d\n", ret); return T_EXIT_FAIL; } ring_size = ENTRIES * sizeof(struct io_uring_buf); for (i = 0; i < BGID_NR; i++) { int bgid = BGID_START + i; struct io_uring_buf_reg reg = { .ring_entries = ENTRIES, .bgid = bgid, .flags = IOU_PBUF_RING_MMAP, }; off_t off; ret = io_uring_register_buf_ring(&ring, ®, 0); if (ret) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "reg buf ring: %d\n", ret); return T_EXIT_FAIL; } off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; br[i] = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); if (br[i] == MAP_FAILED) { perror("mmap"); return T_EXIT_FAIL; } } for (i = 0; i < BGID_NR; i++) { ret = io_uring_unregister_buf_ring(&ring, BGID_START + i); if (ret) { fprintf(stderr, "reg buf ring: %d\n", ret); return T_EXIT_FAIL; } } for (j = 0; j < 1000; j++) { for (i = 0; i < BGID_NR; i++) memset(br[i], 0x5a, ring_size); usleep(1000); } io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/buf-ring.c000066400000000000000000000240751461424365000160450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various shared buffer ring sanity checks * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_buf_ring; static int pagesize; /* test trying to register classic group when ring group exists */ static int test_mixed_reg2(int bgid) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; void *bufs; int ret; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret); if (!br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } /* provide classic buffers, group 1 */ bufs = malloc(8 * 1024); sqe = io_uring_get_sqe(&ring); io_uring_prep_provide_buffers(sqe, bufs, 1024, 8, bgid, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe %d\n", ret); return 1; } if (cqe->res != -EEXIST && cqe->res != -EINVAL) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); io_uring_free_buf_ring(&ring, br, 32, bgid); io_uring_queue_exit(&ring); return 0; } /* test trying to register ring group when classic group exists */ static int test_mixed_reg(int bgid) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; void *bufs; int ret; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; /* provide classic buffers, group 1 */ bufs = malloc(8 * 1024); sqe = io_uring_get_sqe(&ring); io_uring_prep_provide_buffers(sqe, bufs, 1024, 8, bgid, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe %d\n", ret); return 1; } if (cqe->res) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret); if (br) { fprintf(stderr, "Buffer ring setup succeeded unexpectedly %d\n", ret); return 1; } io_uring_queue_exit(&ring); return 0; } static int test_double_reg_unreg(int bgid) { struct io_uring_buf_reg reg = { }; struct io_uring_buf_ring *br; struct io_uring ring; int ret; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret); if (!br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } /* check that 2nd register with same bgid fails */ reg.ring_addr = (unsigned long) br; reg.ring_entries = 32; reg.bgid = bgid; ret = io_uring_register_buf_ring(&ring, ®, 0); if (ret != -EEXIST) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ret = io_uring_free_buf_ring(&ring, br, 32, bgid); if (ret) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ret = io_uring_unregister_buf_ring(&ring, bgid); if (ret != -EINVAL && ret != -ENOENT) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } io_uring_queue_exit(&ring); return 0; } static int test_reg_unreg(int bgid) { struct io_uring_buf_ring *br; struct io_uring ring; int ret; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; br = io_uring_setup_buf_ring(&ring, 32, bgid, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return 0; } fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ret = io_uring_free_buf_ring(&ring, br, 32, bgid); if (ret) { fprintf(stderr, "Buffer ring unregister failed %d\n", ret); return 1; } io_uring_queue_exit(&ring); return 0; } static int test_bad_reg(int bgid) { struct io_uring ring; int ret; struct io_uring_buf_reg reg = { }; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; reg.ring_addr = 4096; reg.ring_entries = 32; reg.bgid = bgid; ret = io_uring_register_buf_ring(&ring, ®, 0); if (!ret) fprintf(stderr, "Buffer ring register worked unexpectedly\n"); io_uring_queue_exit(&ring); return !ret; } static int test_full_page_reg(int bgid) { #if defined(__hppa__) return T_EXIT_SKIP; #else struct io_uring ring; int ret; void *ptr; struct io_uring_buf_reg reg = { }; int entries = pagesize / sizeof(struct io_uring_buf); ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed %d\n", ret); return T_EXIT_FAIL; } ret = posix_memalign(&ptr, pagesize, pagesize * 2); if (ret) { fprintf(stderr, "posix_memalign failed %d\n", ret); goto err; } ret = mprotect(ptr + pagesize, pagesize, PROT_NONE); if (ret) { fprintf(stderr, "mprotect failed %d\n", errno); goto err1; } reg.ring_addr = (unsigned long) ptr; reg.ring_entries = entries; reg.bgid = bgid; ret = io_uring_register_buf_ring(&ring, ®, 0); if (ret) fprintf(stderr, "register buf ring failed %d\n", ret); if (mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_WRITE)) fprintf(stderr, "reverting mprotect failed %d\n", errno); err1: free(ptr); err: io_uring_queue_exit(&ring); return ret ? T_EXIT_FAIL : T_EXIT_PASS; #endif } static int test_one_read(int fd, int bgid, struct io_uring *ring) { int ret; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return -1; } io_uring_prep_read(sqe, fd, NULL, 1, 0); sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = bgid; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return -1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret == -ENOBUFS) return ret; if (ret != 1) { fprintf(stderr, "read result %d\n", ret); return -1; } return cqe->flags >> 16; } static int test_running(int bgid, int entries, int loops, int use_mmap) { int ring_mask = io_uring_buf_ring_mask(entries); struct io_uring_buf_ring *br; int ret, loop, idx, read_fd; struct io_uring ring; char buffer[8]; bool *buffers; ret = t_create_ring(1, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; if (!use_mmap) { br = io_uring_setup_buf_ring(&ring, entries, bgid, 0, &ret); if (!br) { /* by now should have checked if this is supported or not */ fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } } else { struct io_uring_buf_reg reg = { .ring_entries = entries, .bgid = bgid, .flags = IOU_PBUF_RING_MMAP, }; size_t ring_size; off_t off; ret = io_uring_register_buf_ring(&ring, ®, 0); if (ret) { fprintf(stderr, "mmap ring register failed %d\n", ret); return 1; } off = IORING_OFF_PBUF_RING | (unsigned long long) bgid << IORING_OFF_PBUF_SHIFT; ring_size = sizeof(struct io_uring_buf) * entries; br = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring.ring_fd, off); if (br == MAP_FAILED) { perror("mmap"); return 1; } } buffers = malloc(sizeof(bool) * entries); if (!buffers) return 1; read_fd = open("/dev/zero", O_RDONLY); if (read_fd < 0) return 1; for (loop = 0; loop < loops; loop++) { memset(buffers, 0, sizeof(bool) * entries); for (idx = 0; idx < entries; idx++) io_uring_buf_ring_add(br, buffer, sizeof(buffer), idx, ring_mask, idx); io_uring_buf_ring_advance(br, entries); for (idx = 0; idx < entries; idx++) { memset(buffer, 1, sizeof(buffer)); ret = test_one_read(read_fd, bgid, &ring); if (ret < 0) { fprintf(stderr, "bad run %d/%d = %d\n", loop, idx, ret); return ret; } if (buffers[ret]) { fprintf(stderr, "reused buffer %d/%d = %d!\n", loop, idx, ret); return 1; } if (buffer[0] != 0) { fprintf(stderr, "unexpected read %d %d/%d = %d!\n", (int)buffer[0], loop, idx, ret); return 1; } if (buffer[1] != 1) { fprintf(stderr, "unexpected spilled read %d %d/%d = %d!\n", (int)buffer[1], loop, idx, ret); return 1; } buffers[ret] = true; } ret = test_one_read(read_fd, bgid, &ring); if (ret != -ENOBUFS) { fprintf(stderr, "expected enobufs run %d = %d\n", loop, ret); return 1; } } ret = io_uring_unregister_buf_ring(&ring, bgid); if (ret) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } close(read_fd); io_uring_queue_exit(&ring); free(buffers); return 0; } int main(int argc, char *argv[]) { int bgids[] = { 1, 127, -1 }; int entries[] = {1, 32768, 4096, -1 }; int ret, i; if (argc > 1) return T_EXIT_SKIP; pagesize = getpagesize(); for (i = 0; bgids[i] != -1; i++) { ret = test_reg_unreg(bgids[i]); if (ret) { fprintf(stderr, "test_reg_unreg failed\n"); return T_EXIT_FAIL; } if (no_buf_ring) break; ret = test_bad_reg(bgids[i]); if (ret) { fprintf(stderr, "test_bad_reg failed\n"); return T_EXIT_FAIL; } ret = test_double_reg_unreg(bgids[i]); if (ret) { fprintf(stderr, "test_double_reg_unreg failed\n"); return T_EXIT_FAIL; } ret = test_mixed_reg(bgids[i]); if (ret) { fprintf(stderr, "test_mixed_reg failed\n"); return T_EXIT_FAIL; } ret = test_mixed_reg2(bgids[i]); if (ret) { fprintf(stderr, "test_mixed_reg2 failed\n"); return T_EXIT_FAIL; } ret = test_full_page_reg(bgids[i]); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_full_page_reg failed\n"); return T_EXIT_FAIL; } } for (i = 0; !no_buf_ring && entries[i] != -1; i++) { ret = test_running(2, entries[i], 3, 0); if (ret) { fprintf(stderr, "test_running(%d) failed\n", entries[i]); return T_EXIT_FAIL; } } for (i = 0; !no_buf_ring && entries[i] != -1; i++) { ret = test_running(2, entries[i], 3, 1); if (ret) { fprintf(stderr, "test_running(%d) mmap failed\n", entries[i]); return T_EXIT_FAIL; } } return T_EXIT_PASS; } liburing-2.6/test/ce593a6c480a.c000066400000000000000000000061131461424365000161440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test 5.7 regression with task_work not being run while a task is * waiting on another event in the kernel. */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int use_sqpoll = 0; static void notify_fd(int fd) { char buf[8] = {0, 0, 0, 0, 0, 0, 1}; int ret; ret = write(fd, &buf, 8); if (ret < 0) perror("write"); } static void *delay_set_fd_from_thread(void *data) { int fd = (intptr_t) data; sleep(1); notify_fd(fd); return NULL; } int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct io_uring ring; int loop_fd, other_fd; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe = NULL; int ret, use_fd; char buf[8] = {0, 0, 0, 0, 0, 0, 1}; pthread_t tid; if (argc > 1) return T_EXIT_SKIP; /* Create an eventfd to be registered with the loop to be * notified of events being ready */ loop_fd = eventfd(0, EFD_CLOEXEC); if (loop_fd == -1) { fprintf(stderr, "eventfd errno=%d\n", errno); return T_EXIT_FAIL; } /* Create an eventfd that can create events */ use_fd = other_fd = eventfd(0, EFD_CLOEXEC); if (other_fd == -1) { fprintf(stderr, "eventfd errno=%d\n", errno); return T_EXIT_FAIL; } if (use_sqpoll) p.flags = IORING_SETUP_SQPOLL; /* Setup the ring with a registered event fd to be notified on events */ ret = t_create_ring_params(8, &ring, &p); if (ret == T_SETUP_SKIP) return T_EXIT_PASS; else if (ret < 0) return ret; ret = io_uring_register_eventfd(&ring, loop_fd); if (ret < 0) { fprintf(stderr, "register_eventfd=%d\n", ret); return T_EXIT_FAIL; } if (use_sqpoll) { ret = io_uring_register_files(&ring, &other_fd, 1); if (ret < 0) { fprintf(stderr, "register_files=%d\n", ret); return T_EXIT_FAIL; } use_fd = 0; } /* Submit a poll operation to wait on an event in other_fd */ sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, use_fd, POLLIN); sqe->user_data = 1; if (use_sqpoll) sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit=%d\n", ret); return T_EXIT_FAIL; } /* * CASE 3: Hangs forever in Linux 5.7.5; Works in Linux 5.6.0 When this * code is uncommented, we don't se a notification on other_fd until * _after_ we have started the read on loop_fd. In that case, the read() on * loop_fd seems to hang forever. */ pthread_create(&tid, NULL, delay_set_fd_from_thread, (void*) (intptr_t) other_fd); /* Wait on the event fd for an event to be ready */ do { ret = read(loop_fd, buf, 8); } while (ret < 0 && errno == EINTR); if (ret < 0) { perror("read"); return T_EXIT_FAIL; } else if (ret != 8) { fprintf(stderr, "Odd-sized eventfd read: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return ret; } if (cqe->res < 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); return T_EXIT_PASS; } liburing-2.6/test/close-opath.c000066400000000000000000000046631461424365000165530ustar00rootroot00000000000000// SPDX-License-Identifier: MIT #define _GNU_SOURCE 1 #define _FILE_OFFSET_BITS 64 // Test program for io_uring IORING_OP_CLOSE with O_PATH file. // Author: Clayton Harris , 2020-06-07 // linux 5.6.14-300.fc32.x86_64 // gcc 10.1.1-1.fc32 // liburing.x86_64 0.5-1.fc32 // gcc -O2 -Wall -Wextra -std=c11 -o close_opath close_opath.c -luring // ./close_opath testfilepath #include #include #include #include #include #include #include "liburing.h" typedef struct { const char *const flnames; const int oflags; } oflgs_t; static int test_io_uring_close(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "io_uring_get_sqe() failed\n"); return -ENOENT; } io_uring_prep_close(sqe, fd); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "io_uring_submit() failed, errno %d: %s\n", -ret, strerror(-ret)); return ret; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe() failed, errno %d: %s\n", -ret, strerror(-ret)); return ret; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0 && ret != -EOPNOTSUPP && ret != -EINVAL && ret != -EBADF) { fprintf(stderr, "io_uring close() failed, errno %d: %s\n", -ret, strerror(-ret)); return ret; } return 0; } static int open_file(const char *path, const oflgs_t *oflgs) { int fd; fd = openat(AT_FDCWD, path, oflgs->oflags, 0); if (fd < 0) { int err = errno; fprintf(stderr, "openat(%s, %s) failed, errno %d: %s\n", path, oflgs->flnames, err, strerror(err)); return -err; } return fd; } int main(int argc, char *argv[]) { const char *fname = "."; struct io_uring ring; int ret, i; static const oflgs_t oflgs[] = { { "O_RDONLY", O_RDONLY }, { "O_PATH", O_PATH } }; ret = io_uring_queue_init(2, &ring, 0); if (ret < 0) { fprintf(stderr, "io_uring_queue_init() failed, errno %d: %s\n", -ret, strerror(-ret)); return 0x02; } #define OFLGS_SIZE (sizeof(oflgs) / sizeof(oflgs[0])) ret = 0; for (i = 0; i < OFLGS_SIZE; i++) { int fd; fd = open_file(fname, &oflgs[i]); if (fd < 0) { ret |= 0x02; break; } /* Should always succeed */ if (test_io_uring_close(&ring, fd) < 0) ret |= 0x04 << i; } #undef OFLGS_SIZE io_uring_queue_exit(&ring); return ret; } liburing-2.6/test/config000066400000000000000000000005061461424365000153510ustar00rootroot00000000000000# Copy this to config.local, uncomment and define values # # Define tests to exclude from running # TEST_EXCLUDE="" # # Define raw test devices (or files) for test cases, if any # declare -A TEST_MAP=() # # If no TEST_MAP entry exists for a test, use the ones given in TEST_FILES # TEST_FILES="/dev/somedevice /data/somefile" liburing-2.6/test/connect-rep.c000066400000000000000000000103351461424365000165430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that repeated IORING_OP_CONNECT to a socket without a listener keeps * yielding -ECONNREFUSED rather than -ECONNABORTED. Based on a reproducer * from: * * https://github.com/axboe/liburing/issues/828 * * and adopted to our usual test cases. Other changes made like looping, * using different ring types, adding a memset() for reuse, etc. * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static unsigned long ud; static int init_test_server(struct sockaddr_in *serv_addr) { socklen_t servaddr_len = sizeof(struct sockaddr_in); int fd; /* Init server socket. Bind but don't listen */ fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { perror("socket"); return -1; } serv_addr->sin_family = AF_INET; serv_addr->sin_addr.s_addr = inet_addr("127.0.0.1"); if (bind(fd, (struct sockaddr *) serv_addr, servaddr_len) < 0) { perror("bind"); return -1; } /* * Get the addresses the socket is bound to because the port is chosen * by the network stack. */ if (getsockname(fd, (struct sockaddr *)serv_addr, &servaddr_len) < 0) { perror("getsockname"); return -1; } return fd; } static int init_test_client(void) { socklen_t addr_len = sizeof(struct sockaddr_in); struct sockaddr_in client_addr = {}; int clientfd; clientfd = socket(AF_INET, SOCK_STREAM, 0); if (clientfd < 0) { perror("socket"); return -1; } client_addr.sin_family = AF_INET; client_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); if (bind(clientfd, (struct sockaddr *)&client_addr, addr_len) < 0) { perror("bind"); close(clientfd); return -1; } /* * Get the addresses the socket is bound to because the port is chosen * by the network stack. */ if (getsockname(clientfd, (struct sockaddr *)&client_addr, &addr_len) < 0) { perror("getsockname"); close(clientfd); return -1; } return clientfd; } static int get_completion_and_print(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret, res; ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait_cqe=%d\n", ret); return -1; } /* Mark this completion as seen */ res = cqe->res; io_uring_cqe_seen(ring, cqe); return res; } static int test_connect(struct io_uring *ring, int clientfd, struct sockaddr_in *serv_addr) { struct sockaddr_in local_sa; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); io_uring_prep_connect(sqe, clientfd, (const struct sockaddr *)serv_addr, sizeof(struct sockaddr_in)); sqe->user_data = ++ud; memcpy(&local_sa, serv_addr, sizeof(local_sa)); ret = io_uring_submit_and_wait(ring, 1); if (ret != 1) { fprintf(stderr, "submit=%d\n", ret); return T_EXIT_FAIL; } /* check for reuse at the same time */ memset(&local_sa, 0xff, sizeof(local_sa)); ret = get_completion_and_print(ring); if (ret != -ECONNREFUSED) { fprintf(stderr, "Connect got %d\n", ret); return T_EXIT_FAIL; } return T_EXIT_PASS; } static int test(int flags) { struct io_uring_params params = { .flags = flags, }; struct sockaddr_in serv_addr = {}; struct io_uring ring; int ret, clientfd, s_fd, i; if (flags & IORING_SETUP_SQPOLL) params.sq_thread_idle = 50; ret = io_uring_queue_init_params(8, &ring, ¶ms); if (ret < 0) { fprintf(stderr, "Queue init: %d\n", ret); return T_EXIT_FAIL; } s_fd = init_test_server(&serv_addr); if (s_fd < 0) return T_EXIT_FAIL; clientfd = init_test_client(); if (clientfd < 0) { close(s_fd); return T_EXIT_FAIL; } /* make sure SQPOLL thread is sleeping */ if (flags & IORING_SETUP_SQPOLL) usleep(100000); for (i = 0; i < 32; i++) { ret = test_connect(&ring, clientfd, &serv_addr); if (ret == T_EXIT_SKIP) return T_EXIT_SKIP; else if (ret == T_EXIT_PASS) continue; return T_EXIT_FAIL; } close(s_fd); close(clientfd); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test(0) failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SQPOLL); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test(SQPOLL) failed\n"); return T_EXIT_FAIL; } return 0; } liburing-2.6/test/connect.c000066400000000000000000000206251461424365000157620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that IORING_OP_CONNECT works, with and without other side * being open. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_connect; static unsigned short use_port; static unsigned int use_addr; static int create_socket(void) { int fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { perror("socket()"); return -1; } return fd; } static int submit_and_wait(struct io_uring *ring, int *res) { struct io_uring_cqe *cqe; int ret; ret = io_uring_submit_and_wait(ring, 1); if (ret != 1) { fprintf(stderr, "io_using_submit: got %d\n", ret); return 1; } ret = io_uring_peek_cqe(ring, &cqe); if (ret) { fprintf(stderr, "io_uring_peek_cqe(): no cqe returned"); return 1; } *res = cqe->res; io_uring_cqe_seen(ring, cqe); return 0; } static int wait_for(struct io_uring *ring, int fd, int mask) { struct io_uring_sqe *sqe; int ret, res; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "unable to get sqe\n"); return -1; } io_uring_prep_poll_add(sqe, fd, mask); sqe->user_data = 2; ret = submit_and_wait(ring, &res); if (ret) return -1; if (res < 0) { fprintf(stderr, "poll(): failed with %d\n", res); return -1; } return res; } static int listen_on_socket(int fd) { struct sockaddr_in addr; int ret; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = use_port; addr.sin_addr.s_addr = use_addr; ret = bind(fd, (struct sockaddr*)&addr, sizeof(addr)); if (ret == -1) { perror("bind()"); return -1; } ret = listen(fd, 128); if (ret == -1) { perror("listen()"); return -1; } return 0; } static int configure_connect(int fd, struct sockaddr_in* addr) { int ret, val = 1; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); if (ret == -1) { perror("setsockopt()"); return -1; } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); if (ret == -1) { perror("setsockopt()"); return -1; } memset(addr, 0, sizeof(*addr)); addr->sin_family = AF_INET; addr->sin_port = use_port; ret = inet_aton("127.0.0.1", &addr->sin_addr); return ret; } static int connect_socket(struct io_uring *ring, int fd, int *code, int async) { struct sockaddr_in addr; int ret, res; socklen_t code_len = sizeof(*code); struct io_uring_sqe *sqe; if (configure_connect(fd, &addr) == -1) return -1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "unable to get sqe\n"); return -1; } io_uring_prep_connect(sqe, fd, (struct sockaddr*)&addr, sizeof(addr)); if (async) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; ret = submit_and_wait(ring, &res); if (ret) return -1; if (res == -EINPROGRESS) { ret = wait_for(ring, fd, POLLOUT | POLLHUP | POLLERR); if (ret == -1) return -1; int ev = (ret & POLLOUT) || (ret & POLLHUP) || (ret & POLLERR); if (!ev) { fprintf(stderr, "poll(): returned invalid value %#x\n", ret); return -1; } ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, code, &code_len); if (ret == -1) { perror("getsockopt()"); return -1; } } else *code = res; return 0; } static int test_connect_with_no_peer(struct io_uring *ring) { int connect_fd; int ret, code; connect_fd = create_socket(); if (connect_fd == -1) return -1; ret = connect_socket(ring, connect_fd, &code, 0); if (ret == -1) goto err; if (code != -ECONNREFUSED) { if (code == -EINVAL || code == -EBADF || code == -EOPNOTSUPP) { fprintf(stdout, "No connect support, skipping\n"); no_connect = 1; goto out; } fprintf(stderr, "connect failed with %d\n", code); goto err; } out: close(connect_fd); return 0; err: close(connect_fd); return -1; } static int test_connect(struct io_uring *ring, int async) { int accept_fd; int connect_fd; int ret, code; accept_fd = create_socket(); if (accept_fd == -1) return -1; ret = listen_on_socket(accept_fd); if (ret == -1) goto err1; connect_fd = create_socket(); if (connect_fd == -1) goto err1; ret = connect_socket(ring, connect_fd, &code, async); if (ret == -1) goto err2; if (code != 0) { fprintf(stderr, "connect failed with %d\n", code); goto err2; } close(connect_fd); close(accept_fd); return 0; err2: close(connect_fd); err1: close(accept_fd); return -1; } static int test_connect_timeout(struct io_uring *ring) { int connect_fd[2] = {-1, -1}; int accept_fd = -1; int ret, code; struct sockaddr_in addr; struct io_uring_sqe *sqe; struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 100000}; struct stat sb; /* * Test reliably fails if syncookies isn't enabled */ if (stat("/proc/sys/net/ipv4/tcp_syncookies", &sb) < 0) return T_EXIT_SKIP; connect_fd[0] = create_socket(); if (connect_fd[0] == -1) return -1; connect_fd[1] = create_socket(); if (connect_fd[1] == -1) goto err; accept_fd = create_socket(); if (accept_fd == -1) goto err; if (configure_connect(connect_fd[0], &addr) == -1) goto err; if (configure_connect(connect_fd[1], &addr) == -1) goto err; ret = bind(accept_fd, (struct sockaddr*)&addr, sizeof(addr)); if (ret == -1) { perror("bind()"); goto err; } ret = listen(accept_fd, 0); // no backlog in order to block connect_fd[1] if (ret == -1) { perror("listen()"); goto err; } // We first connect with one client socket in order to fill the accept queue. ret = connect_socket(ring, connect_fd[0], &code, 0); if (ret == -1 || code != 0) { fprintf(stderr, "unable to connect\n"); goto err; } // We do not offload completion events from listening socket on purpose. // This way we create a state where the second connect request being stalled by OS. sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "unable to get sqe\n"); goto err; } io_uring_prep_connect(sqe, connect_fd[1], (struct sockaddr*)&addr, sizeof(addr)); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "unable to get sqe\n"); goto err; } io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { fprintf(stderr, "submitted %d\n", ret); return -1; } for (int i = 0; i < 2; i++) { int expected; struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return -1; } expected = (cqe->user_data == 1) ? -ECANCELED : -ETIME; if (expected != cqe->res) { fprintf(stderr, "cqe %d, res %d, wanted %d\n", (int)cqe->user_data, cqe->res, expected); goto err; } io_uring_cqe_seen(ring, cqe); } close(connect_fd[0]); close(connect_fd[1]); close(accept_fd); return 0; err: if (connect_fd[0] != -1) close(connect_fd[0]); if (connect_fd[1] != -1) close(connect_fd[1]); if (accept_fd != -1) close(accept_fd); return -1; } static int test(int flags) { struct io_uring ring; int ret; ret = io_uring_queue_init(8, &ring, flags); if (ret) { fprintf(stderr, "io_uring_queue_setup() = %d\n", ret); return T_EXIT_FAIL; } srand(getpid()); use_port = (rand() % 61440) + 4096; use_port = htons(use_port); use_addr = inet_addr("127.0.0.1"); ret = test_connect_with_no_peer(&ring); if (ret == -1) { fprintf(stderr, "test_connect_with_no_peer(): failed\n"); return T_EXIT_FAIL; } if (no_connect) return T_EXIT_SKIP; ret = test_connect(&ring, 0); if (ret == -1) { fprintf(stderr, "test_connect(): failed\n"); return T_EXIT_FAIL; } ret = test_connect(&ring, 1); if (ret == -1) { fprintf(stderr, "test_connect(): failed\n"); return T_EXIT_FAIL; } ret = test_connect_timeout(&ring); if (ret == -1) { fprintf(stderr, "test_connect_timeout(): failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0); if (ret == -1) { fprintf(stderr, "test 0 failed\n"); return T_EXIT_FAIL; } if (no_connect) return T_EXIT_SKIP; ret = test(IORING_SETUP_SQPOLL); if (ret == -1) { fprintf(stderr, "test SQPOLL failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); if (ret == -1) { fprintf(stderr, "test DEFER failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/coredump.c000066400000000000000000000021141461424365000161400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: trigger segfault. A recent 6.4-rc kernel introduced a bug * via vhost where segfaults for applications using io_uring * would hang in D state forever upon trying to generate the * core file. Perform a trivial test where a child process * generates a NULL pointer dereference and ensure that we don't * hang. * */ #include #include #include #include #include "liburing.h" #include "helpers.h" static void test(void) { struct io_uring_sqe *sqe; struct io_uring ring; int *ptr = NULL; int fds[2]; char r1; if (pipe(fds) < 0) { perror("pipe"); exit(0); } io_uring_queue_init(8, &ring, 0); sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], &r1, sizeof(r1), 0); sqe->flags = IOSQE_ASYNC; sqe->user_data = 1; io_uring_submit(&ring); *ptr = 0; exit(0); } int main(int argc, char *argv[]) { pid_t pid; int wstat; pid = fork(); if (pid < 0) { perror("fork"); return T_EXIT_SKIP; } else if (!pid) { test(); } wait(&wstat); return T_EXIT_PASS; } liburing-2.6/test/cq-full.c000066400000000000000000000032131461424365000156660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test CQ ring overflow * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int queue_n_nops(struct io_uring *ring, int n) { struct io_uring_sqe *sqe; int i, ret; for (i = 0; i < n; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); } ret = io_uring_submit(ring); if (ret < n) { printf("Submitted only %d\n", ret); goto err; } else if (ret < 0) { printf("sqe submit failed: %d\n", ret); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring_cqe *cqe; struct io_uring_params p; struct io_uring ring; int i, ret; if (argc > 1) return T_EXIT_SKIP; memset(&p, 0, sizeof(p)); ret = io_uring_queue_init_params(4, &ring, &p); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } if (queue_n_nops(&ring, 4)) goto err; if (queue_n_nops(&ring, 4)) goto err; if (queue_n_nops(&ring, 4)) goto err; i = 0; do { ret = io_uring_peek_cqe(&ring, &cqe); if (ret < 0) { if (ret == -EAGAIN) break; printf("wait completion %d\n", ret); goto err; } io_uring_cqe_seen(&ring, cqe); if (!cqe) break; i++; } while (1); if (i < 8 || ((*ring.cq.koverflow != 4) && !(p.features & IORING_FEAT_NODROP))) { printf("CQ overflow fail: %d completions, %u overflow\n", i, *ring.cq.koverflow); goto err; } io_uring_queue_exit(&ring); return T_EXIT_PASS; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/cq-overflow.c000066400000000000000000000270451461424365000166000ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various CQ ring overflow tests * */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (256 * 1024) #define BS 4096 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; #define ENTRIES 8 /* * io_uring has rare cases where CQEs are lost. * This happens when there is no space in the CQ ring, and also there is no * GFP_ATOMIC memory available. In reality this probably means that the process * is about to be killed as many other things might start failing, but we still * want to test that liburing and the kernel deal with this properly. The fault * injection framework allows us to test this scenario. Unfortunately this * requires some system wide changes and so we do not enable this by default. * The tests in this file should work in both cases (where overflows are queued * and where they are dropped) on recent kernels. * * In order to test dropped CQEs you should enable fault injection in the kernel * config: * * CONFIG_FAULT_INJECTION=y * CONFIG_FAILSLAB=y * CONFIG_FAULT_INJECTION_DEBUG_FS=y * * and then run the test as follows: * echo Y > /sys/kernel/debug/failslab/task-filter * echo 100 > /sys/kernel/debug/failslab/probability * echo 0 > /sys/kernel/debug/failslab/verbose * echo 100000 > /sys/kernel/debug/failslab/times * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t" */ static int test_io(const char *file, unsigned long usecs, unsigned *drops, int fault) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring_params p; unsigned reaped, total; struct io_uring ring; int nodrop, i, fd, ret; bool cqe_dropped = false; fd = open(file, O_RDONLY | O_DIRECT); if (fd < 0) { if (errno == EINVAL) return T_EXIT_SKIP; perror("file open"); return T_EXIT_FAIL; } memset(&p, 0, sizeof(p)); ret = io_uring_queue_init_params(ENTRIES, &ring, &p); if (ret) { close(fd); fprintf(stderr, "ring create failed: %d\n", ret); return T_EXIT_FAIL; } nodrop = 0; if (p.features & IORING_FEAT_NODROP) nodrop = 1; total = 0; for (i = 0; i < BUFFERS / 2; i++) { off_t offset; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } offset = BS * (rand() % BUFFERS); if (fault && i == ENTRIES + 4) vecs[i].iov_base = NULL; io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); ret = io_uring_submit(&ring); if (nodrop && ret == -EBUSY) { *drops = 1; total = i; break; } else if (ret != 1) { fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); total = i; break; } total++; } if (*drops) goto reap_it; usleep(usecs); for (i = total; i < BUFFERS; i++) { off_t offset; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } offset = BS * (rand() % BUFFERS); io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); ret = io_uring_submit(&ring); if (nodrop && ret == -EBUSY) { *drops = 1; break; } else if (ret != 1) { fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); break; } total++; } reap_it: reaped = 0; do { if (nodrop && !cqe_dropped) { /* nodrop should never lose events unless cqe_dropped */ if (reaped == total) break; } else { if (reaped + *ring.cq.koverflow == total) break; } ret = io_uring_wait_cqe(&ring, &cqe); if (nodrop && ret == -EBADR) { cqe_dropped = true; continue; } else if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->res != BS) { if (!(fault && cqe->res == -EFAULT)) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); goto err; } } io_uring_cqe_seen(&ring, cqe); reaped++; } while (1); if (!io_uring_peek_cqe(&ring, &cqe)) { fprintf(stderr, "found unexpected completion\n"); goto err; } if (!nodrop || cqe_dropped) { *drops = *ring.cq.koverflow; } else if (*ring.cq.koverflow) { fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow); goto err; } io_uring_queue_exit(&ring); close(fd); return T_EXIT_PASS; err: if (fd != -1) close(fd); io_uring_queue_exit(&ring); return T_EXIT_SKIP; } static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait) { struct io_uring_cqe *cqe; int i, ret = 0, seq = 0; unsigned int start_overflow = *ring->cq.koverflow; bool dropped = false; for (i = 0; i < nr_events; i++) { if (do_wait) ret = io_uring_wait_cqe(ring, &cqe); else ret = io_uring_peek_cqe(ring, &cqe); if (do_wait && ret == -EBADR) { unsigned int this_drop = *ring->cq.koverflow - start_overflow; dropped = true; start_overflow = *ring->cq.koverflow; assert(this_drop > 0); i += (this_drop - 1); continue; } else if (ret) { if (ret != -EAGAIN) fprintf(stderr, "cqe peek failed: %d\n", ret); break; } if (!dropped && cqe->user_data != seq) { fprintf(stderr, "cqe sequence out-of-order\n"); fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data, seq); return -EINVAL; } seq++; io_uring_cqe_seen(ring, cqe); } return i ? i : ret; } /* * Submit some NOPs and watch if the overflow is correct */ static int test_overflow(void) { struct io_uring ring; struct io_uring_params p; struct io_uring_sqe *sqe; unsigned pending; int ret, i, j; memset(&p, 0, sizeof(p)); ret = io_uring_queue_init_params(4, &ring, &p); if (ret) { fprintf(stderr, "io_uring_queue_init failed %d\n", ret); return 1; } /* submit 4x4 SQEs, should overflow the ring by 8 */ pending = 0; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = (i * 4) + j; } ret = io_uring_submit(&ring); if (ret == 4) { pending += 4; continue; } if (p.features & IORING_FEAT_NODROP) { if (ret == -EBUSY) break; } fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } /* we should now have 8 completions ready */ ret = reap_events(&ring, pending, 0); if (ret < 0) goto err; if (!(p.features & IORING_FEAT_NODROP)) { if (*ring.cq.koverflow != 8) { fprintf(stderr, "cq ring overflow %d, expected 8\n", *ring.cq.koverflow); goto err; } } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } static void submit_one_nop(struct io_uring *ring, int ud) { struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); assert(sqe); io_uring_prep_nop(sqe); sqe->user_data = ud; ret = io_uring_submit(ring); assert(ret == 1); } /* * Create an overflow condition and ensure that SQEs are still processed */ static int test_overflow_handling(bool batch, int cqe_multiple, bool poll, bool defer) { struct io_uring ring; struct io_uring_params p; int ret, i, j, ud, cqe_count; unsigned int count; int const N = 8; int const LOOPS = 128; int const QUEUE_LENGTH = 1024; int completions[N]; int queue[QUEUE_LENGTH]; int queued = 0; int outstanding = 0; bool cqe_dropped = false; memset(&completions, 0, sizeof(int) * N); memset(&p, 0, sizeof(p)); p.cq_entries = 2 * cqe_multiple; p.flags |= IORING_SETUP_CQSIZE; if (poll) p.flags |= IORING_SETUP_IOPOLL; if (defer) p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init_params(2, &ring, &p); if (ret) { fprintf(stderr, "io_uring_queue_init failed %d\n", ret); return 1; } assert(p.cq_entries < N); /* submit N SQEs, some should overflow */ for (i = 0; i < N; i++) { submit_one_nop(&ring, i); outstanding++; } for (i = 0; i < LOOPS; i++) { struct io_uring_cqe *cqes[N]; if (io_uring_cq_has_overflow(&ring)) { /* * Flush any overflowed CQEs and process those. Actively * flush these to make sure CQEs arrive in vague order * of being sent. */ ret = io_uring_get_events(&ring); if (ret != 0) { fprintf(stderr, "io_uring_get_events returned %d\n", ret); goto err; } } else if (!cqe_dropped) { for (j = 0; j < queued; j++) { submit_one_nop(&ring, queue[j]); outstanding++; } queued = 0; } /* We have lost some random cqes, stop if no remaining. */ if (cqe_dropped && outstanding == *ring.cq.koverflow) break; ret = io_uring_wait_cqe(&ring, &cqes[0]); if (ret == -EBADR) { cqe_dropped = true; fprintf(stderr, "CQE dropped\n"); continue; } else if (ret != 0) { fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret); goto err; } cqe_count = 1; if (batch) { ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2); if (ret < 0) { fprintf(stderr, "io_uring_peek_batch_cqe failed %d\n", ret); goto err; } cqe_count = ret; } for (j = 0; j < cqe_count; j++) { assert(cqes[j]->user_data < N); ud = cqes[j]->user_data; completions[ud]++; assert(queued < QUEUE_LENGTH); queue[queued++] = (int)ud; } io_uring_cq_advance(&ring, cqe_count); outstanding -= cqe_count; } /* See if there were any drops by flushing the CQ ring *and* overflow */ do { struct io_uring_cqe *cqe; ret = io_uring_get_events(&ring); if (ret < 0) { if (ret == -EBADR) { fprintf(stderr, "CQE dropped\n"); cqe_dropped = true; break; } goto err; } if (outstanding && !io_uring_cq_ready(&ring)) ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL); if (ret && ret != -ETIME) { if (ret == -EBADR) { fprintf(stderr, "CQE dropped\n"); cqe_dropped = true; break; } fprintf(stderr, "wait_cqe_timeout = %d\n", ret); goto err; } count = io_uring_cq_ready(&ring); io_uring_cq_advance(&ring, count); outstanding -= count; } while (count); io_uring_queue_exit(&ring); /* Make sure that completions come back in the same order they were * sent. If they come back unfairly then this will concentrate on a * couple of indices. */ for (i = 1; !cqe_dropped && i < N; i++) { if (abs(completions[i] - completions[i - 1]) > 1) { fprintf(stderr, "bad completion size %d %d\n", completions[i], completions[i - 1]); goto err; } } return 0; err: io_uring_queue_exit(&ring); return 1; } int main(int argc, char *argv[]) { const char *fname = ".cq-overflow"; unsigned iters, drops; unsigned long usecs; int ret; int i; bool can_defer; if (argc > 1) return T_EXIT_SKIP; can_defer = t_probe_defer_taskrun(); for (i = 0; i < 16; i++) { bool batch = i & 1; int mult = (i & 2) ? 1 : 2; bool poll = i & 4; bool defer = i & 8; if (defer && !can_defer) continue; ret = test_overflow_handling(batch, mult, poll, defer); if (ret) { fprintf(stderr, "test_overflow_handling(" "batch=%d, mult=%d, poll=%d, defer=%d) failed\n", batch, mult, poll, defer); goto err; } } ret = test_overflow(); if (ret) { fprintf(stderr, "test_overflow failed\n"); return ret; } t_create_file(fname, FILE_SIZE); vecs = t_create_buffers(BUFFERS, BS); iters = 0; usecs = 1000; do { drops = 0; ret = test_io(fname, usecs, &drops, 0); if (ret == T_EXIT_SKIP) break; else if (ret != T_EXIT_PASS) { fprintf(stderr, "test_io nofault failed\n"); goto err; } if (drops) break; usecs = (usecs * 12) / 10; iters++; } while (iters < 40); if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) { fprintf(stderr, "test_io nofault failed\n"); goto err; } if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) { fprintf(stderr, "test_io fault failed\n"); goto err; } unlink(fname); return T_EXIT_PASS; err: unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/cq-peek-batch.c000066400000000000000000000036401461424365000167330ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test CQ peek-batch * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int queue_n_nops(struct io_uring *ring, int n, int offset) { struct io_uring_sqe *sqe; int i, ret; for (i = 0; i < n; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = i + offset; } ret = io_uring_submit(ring); if (ret < n) { printf("Submitted only %d\n", ret); goto err; } else if (ret < 0) { printf("sqe submit failed: %d\n", ret); goto err; } return 0; err: return 1; } #define CHECK_BATCH(ring, got, cqes, count, expected) do {\ got = io_uring_peek_batch_cqe((ring), cqes, count);\ if (got != expected) {\ printf("Got %d CQs, expected %d\n", got, expected);\ goto err;\ }\ } while(0) int main(int argc, char *argv[]) { struct io_uring_cqe *cqes[8]; struct io_uring ring; int ret, i; unsigned got; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(4, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } CHECK_BATCH(&ring, got, cqes, 4, 0); if (queue_n_nops(&ring, 4, 0)) goto err; CHECK_BATCH(&ring, got, cqes, 4, 4); for (i=0;i<4;i++) { if (i != cqes[i]->user_data) { printf("Got user_data %" PRIu64 ", expected %d\n", (uint64_t) cqes[i]->user_data, i); goto err; } } if (queue_n_nops(&ring, 4, 4)) goto err; io_uring_cq_advance(&ring, 4); CHECK_BATCH(&ring, got, cqes, 4, 4); for (i=0;i<4;i++) { if (i + 4 != cqes[i]->user_data) { printf("Got user_data %" PRIu64 ", expected %d\n", (uint64_t) cqes[i]->user_data, i + 4); goto err; } } io_uring_cq_advance(&ring, 8); io_uring_queue_exit(&ring); return T_EXIT_PASS; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/cq-ready.c000066400000000000000000000031231461424365000160300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test CQ ready * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int queue_n_nops(struct io_uring *ring, int n) { struct io_uring_sqe *sqe; int i, ret; for (i = 0; i < n; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); } ret = io_uring_submit(ring); if (ret < n) { printf("Submitted only %d\n", ret); goto err; } else if (ret < 0) { printf("sqe submit failed: %d\n", ret); goto err; } return 0; err: return 1; } #define CHECK_READY(ring, expected) do {\ ready = io_uring_cq_ready((ring));\ if (ready != expected) {\ printf("Got %d CQs ready, expected %d\n", ready, expected);\ goto err;\ }\ } while(0) int main(int argc, char *argv[]) { struct io_uring ring; int ret; unsigned ready; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(4, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } CHECK_READY(&ring, 0); if (queue_n_nops(&ring, 4)) goto err; CHECK_READY(&ring, 4); io_uring_cq_advance(&ring, 4); CHECK_READY(&ring, 0); if (queue_n_nops(&ring, 4)) goto err; CHECK_READY(&ring, 4); io_uring_cq_advance(&ring, 1); CHECK_READY(&ring, 3); io_uring_cq_advance(&ring, 2); CHECK_READY(&ring, 1); io_uring_cq_advance(&ring, 1); CHECK_READY(&ring, 0); io_uring_queue_exit(&ring); return T_EXIT_PASS; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/cq-size.c000066400000000000000000000023431461424365000157010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test CQ ring sizing */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_params p; struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_CQSIZE; p.cq_entries = 64; ret = io_uring_queue_init_params(4, &ring, &p); if (ret) { if (ret == -EINVAL) { printf("Skipped, not supported on this kernel\n"); goto done; } printf("ring setup failed\n"); return T_EXIT_FAIL; } if (p.cq_entries < 64) { printf("cq entries invalid (%d)\n", p.cq_entries); goto err; } io_uring_queue_exit(&ring); memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_CQSIZE; p.cq_entries = 0; ret = io_uring_queue_init_params(4, &ring, &p); if (ret >= 0) { printf("zero sized cq ring succeeded\n"); io_uring_queue_exit(&ring); goto err; } if (ret != -EINVAL) { printf("io_uring_queue_init_params failed, but not with -EINVAL" ", returned error %d (%s)\n", ret, strerror(-ret)); goto err; } done: return T_EXIT_PASS; err: return T_EXIT_FAIL; } liburing-2.6/test/d4ae271dfaae.c000066400000000000000000000035171461424365000163620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test case for SQPOLL missing a 'ret' clear in case of busy. * * Heavily based on a test case from * Xiaoguang Wang */ #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024) int main(int argc, char *argv[]) { struct io_uring ring; int i, fd, ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec *iovecs; struct io_uring_params p; char *fname; void *buf; memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_SQPOLL; ret = t_create_ring_params(16, &ring, &p); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret < 0) return T_EXIT_FAIL; if (argc > 1) { fname = argv[1]; } else { fname = ".sqpoll.tmp"; t_create_file(fname, FILE_SIZE); } fd = open(fname, O_RDONLY | O_DIRECT); if (fname != argv[1]) unlink(fname); if (fd < 0) { perror("open"); goto out; } iovecs = t_calloc(10, sizeof(struct iovec)); for (i = 0; i < 10; i++) { t_posix_memalign(&buf, 4096, 4096); iovecs[i].iov_base = buf; iovecs[i].iov_len = 4096; } ret = io_uring_register_files(&ring, &fd, 1); if (ret < 0) { fprintf(stderr, "register files %d\n", ret); goto out; } for (i = 0; i < 10; i++) { sqe = io_uring_get_sqe(&ring); if (!sqe) break; io_uring_prep_readv(sqe, 0, &iovecs[i], 1, 0); sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); usleep(1000); } for (i = 0; i < 10; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); break; } if (cqe->res != 4096) { fprintf(stderr, "ret=%d, wanted 4096\n", cqe->res); ret = 1; break; } io_uring_cqe_seen(&ring, cqe); } close(fd); out: io_uring_queue_exit(&ring); return ret; } liburing-2.6/test/d77a67ed5f27.c000066400000000000000000000024031461424365000161570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" static void sig_alrm(int sig) { fprintf(stderr, "Timed out!\n"); exit(1); } int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring_params p; struct io_uring ring; int ret, data; if (argc > 1) return T_EXIT_SKIP; signal(SIGALRM, sig_alrm); memset(&p, 0, sizeof(p)); p.sq_thread_idle = 100; p.flags = IORING_SETUP_SQPOLL; ret = t_create_ring_params(4, &ring, &p); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret < 0) return T_EXIT_FAIL; /* make sure sq thread is sleeping at this point */ usleep(150000); alarm(1); sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); return T_EXIT_FAIL; } io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) (unsigned long) 42); io_uring_submit_and_wait(&ring, 1); ret = io_uring_peek_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "cqe get failed\n"); return 1; } data = (unsigned long) io_uring_cqe_get_data(cqe); if (data != 42) { fprintf(stderr, "invalid data: %d\n", data); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/defer-taskrun.c000066400000000000000000000201251461424365000170760ustar00rootroot00000000000000// SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "test.h" #include "helpers.h" #define EXEC_FILENAME ".defer-taskrun" #define EXEC_FILESIZE (1U<<20) static bool can_read_t(int fd, int time) { int ret; struct pollfd p = { .fd = fd, .events = POLLIN, }; ret = poll(&p, 1, time); return ret == 1; } static bool can_read(int fd) { return can_read_t(fd, 0); } static void eventfd_clear(int fd) { uint64_t val; int ret; assert(can_read(fd)); ret = read(fd, &val, 8); assert(ret == 8); } static void eventfd_trigger(int fd) { uint64_t val = 1; int ret; ret = write(fd, &val, sizeof(val)); assert(ret == sizeof(val)); } #define CHECK(x) \ do { \ if (!(x)) { \ fprintf(stderr, "%s:%d %s failed\n", __FILE__, __LINE__, #x); \ return -1; \ } \ } while (0) static int test_eventfd(void) { struct io_uring ring; int ret; int fda, fdb; struct io_uring_cqe *cqe; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret) return ret; fda = eventfd(0, EFD_NONBLOCK); fdb = eventfd(0, EFD_NONBLOCK); CHECK(fda >= 0 && fdb >= 0); ret = io_uring_register_eventfd(&ring, fda); if (ret) return ret; CHECK(!can_read(fda)); CHECK(!can_read(fdb)); io_uring_prep_poll_add(io_uring_get_sqe(&ring), fdb, POLLIN); io_uring_submit(&ring); CHECK(!can_read(fda)); /* poll should not have completed */ io_uring_prep_nop(io_uring_get_sqe(&ring)); io_uring_submit(&ring); CHECK(can_read(fda)); /* nop should have */ CHECK(io_uring_peek_cqe(&ring, &cqe) == 0); CHECK(cqe->res == 0); io_uring_cqe_seen(&ring, cqe); eventfd_clear(fda); eventfd_trigger(fdb); /* can take time due to rcu_call */ CHECK(can_read_t(fda, 1000)); /* should not have processed the cqe yet */ CHECK(io_uring_cq_ready(&ring) == 0); io_uring_get_events(&ring); CHECK(io_uring_cq_ready(&ring) == 1); io_uring_queue_exit(&ring); return 0; } struct thread_data { struct io_uring ring; int efd; char buff[8]; }; static void *thread(void *t) { struct thread_data *td = t; io_uring_enable_rings(&td->ring); io_uring_prep_read(io_uring_get_sqe(&td->ring), td->efd, td->buff, sizeof(td->buff), 0); io_uring_submit(&td->ring); return NULL; } static int test_thread_shutdown(void) { pthread_t t1; int ret; struct thread_data td; struct io_uring_cqe *cqe; uint64_t val = 1; ret = io_uring_queue_init(8, &td.ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_R_DISABLED); if (ret) return ret; CHECK(io_uring_get_events(&td.ring) == -EBADFD); td.efd = eventfd(0, 0); CHECK(td.efd >= 0); CHECK(pthread_create(&t1, NULL, thread, &td) == 0); CHECK(pthread_join(t1, NULL) == 0); CHECK(io_uring_get_events(&td.ring) == -EEXIST); CHECK(write(td.efd, &val, sizeof(val)) == sizeof(val)); CHECK(io_uring_wait_cqe(&td.ring, &cqe) == -EEXIST); close(td.efd); io_uring_queue_exit(&td.ring); return 0; } static int test_exec(const char *filename) { int ret; int fd; struct io_uring ring; pid_t fork_pid; static char * const new_argv[] = {"1", "2", "3", NULL}; static char * const new_env[] = {NULL}; char *buff; fork_pid = fork(); CHECK(fork_pid >= 0); if (fork_pid > 0) { int wstatus; CHECK(waitpid(fork_pid, &wstatus, 0) != (pid_t)-1); if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) == T_EXIT_FAIL) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); return -1; } return T_EXIT_PASS; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret) return ret; if (filename) { fd = open(filename, O_RDONLY | O_DIRECT); if (fd < 0 && errno == EINVAL) return T_EXIT_SKIP; } else { t_create_file(EXEC_FILENAME, EXEC_FILESIZE); fd = open(EXEC_FILENAME, O_RDONLY | O_DIRECT); if (fd < 0 && errno == EINVAL) { unlink(EXEC_FILENAME); return T_EXIT_SKIP; } unlink(EXEC_FILENAME); } buff = (char*)malloc(EXEC_FILESIZE); CHECK(posix_memalign((void **)&buff, 4096, EXEC_FILESIZE) == 0); CHECK(buff); CHECK(fd >= 0); io_uring_prep_read(io_uring_get_sqe(&ring), fd, buff, EXEC_FILESIZE, 0); io_uring_submit(&ring); ret = execve("/proc/self/exe", new_argv, new_env); /* if we get here it failed anyway */ fprintf(stderr, "execve failed %d\n", ret); return T_EXIT_FAIL; } static int test_flag(void) { struct io_uring ring; int ret; int fd; struct io_uring_cqe *cqe; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_TASKRUN_FLAG); CHECK(!ret); fd = eventfd(0, EFD_NONBLOCK); CHECK(fd >= 0); io_uring_prep_poll_add(io_uring_get_sqe(&ring), fd, POLLIN); io_uring_submit(&ring); CHECK(!can_read(fd)); /* poll should not have completed */ eventfd_trigger(fd); CHECK(can_read(fd)); /* should not have processed the poll cqe yet */ CHECK(io_uring_cq_ready(&ring) == 0); /* flag should be set */ CHECK(IO_URING_READ_ONCE(*ring.sq.kflags) & IORING_SQ_TASKRUN); /* Specifically peek, knowing we have only no cqe * but because the flag is set, liburing should try and get more */ ret = io_uring_peek_cqe(&ring, &cqe); CHECK(ret == 0 && cqe); CHECK(!(IO_URING_READ_ONCE(*ring.sq.kflags) & IORING_SQ_TASKRUN)); close(fd); io_uring_queue_exit(&ring); return 0; } static int test_ring_shutdown(void) { struct io_uring ring; int ret; int fd[2]; char buff = '\0'; char send = 'X'; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_TASKRUN_FLAG); CHECK(!ret); ret = t_create_socket_pair(fd, true); CHECK(!ret); io_uring_prep_recv(io_uring_get_sqe(&ring), fd[0], &buff, 1, 0); io_uring_submit(&ring); ret = write(fd[1], &send, 1); CHECK(ret == 1); /* should not have processed the poll cqe yet */ CHECK(io_uring_cq_ready(&ring) == 0); io_uring_queue_exit(&ring); /* task work should have been processed by now */ CHECK(buff = 'X'); return 0; } static int test_drain(void) { struct io_uring ring; int ret, i, fd[2]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iovecs[128]; char buff[ARRAY_SIZE(iovecs)]; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_TASKRUN_FLAG); CHECK(!ret); for (i = 0; i < ARRAY_SIZE(iovecs); i++) { iovecs[i].iov_base = &buff[i]; iovecs[i].iov_len = 1; } ret = t_create_socket_pair(fd, true); CHECK(!ret); sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, fd[1], &iovecs[0], ARRAY_SIZE(iovecs), 0); sqe->flags |= IOSQE_IO_DRAIN; io_uring_submit(&ring); for (i = 0; i < ARRAY_SIZE(iovecs); i++) iovecs[i].iov_base = NULL; CHECK(io_uring_wait_cqe(&ring, &cqe) == 0); CHECK(cqe->res == 128); close(fd[0]); close(fd[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ret; const char *filename = NULL; if (argc > 2) return T_EXIT_SKIP; if (argc == 2) { /* This test exposes interesting behaviour with a null-blk * device configured like: * $ modprobe null-blk completion_nsec=100000000 irqmode=2 * and then run with $ defer-taskrun.t /dev/nullb0 */ filename = argv[1]; } if (!t_probe_defer_taskrun()) return T_EXIT_SKIP; ret = test_thread_shutdown(); if (ret) { fprintf(stderr, "test_thread_shutdown failed\n"); return T_EXIT_FAIL; } ret = test_exec(filename); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_exec failed\n"); return T_EXIT_FAIL; } ret = test_eventfd(); if (ret) { fprintf(stderr, "eventfd failed\n"); return T_EXIT_FAIL; } ret = test_flag(); if (ret) { fprintf(stderr, "flag failed\n"); return T_EXIT_FAIL; } ret = test_ring_shutdown(); if (ret) { fprintf(stderr, "test_ring_shutdown failed\n"); return T_EXIT_FAIL; } ret = test_drain(); if (ret) { fprintf(stderr, "test_drain failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/defer-tw-timeout.c000066400000000000000000000061371461424365000175340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test waiting for more events than what will be posted with * a timeout with DEFER_TASKRUN. All kernels should time out, * but a non-buggy kernel will end up with one CQE available * for reaping. Buggy kernels will not have processed the * task_work and will have 0 events. * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" struct d { int fd; }; static void *thread_fn(void *data) { struct d *d = data; int ret; usleep(100000); ret = write(d->fd, "Hello", 5); if (ret < 0) perror("write"); return NULL; } static int test_poll(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret, fds[2], i; pthread_t thread; char buf[32]; struct d d; void *tret; if (pipe(fds) < 0) { perror("pipe"); return 1; } d.fd = fds[1]; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); pthread_create(&thread, NULL, thread_fn, &d); ts.tv_sec = 1; ts.tv_nsec = 0; ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL); if (ret != 1) { fprintf(stderr, "unexpected wait ret %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { ret = io_uring_peek_cqe(ring, &cqe); if (ret) break; io_uring_cqe_seen(ring, cqe); } if (i != 1) { fprintf(stderr, "Got %d request, expected 1\n", i); return T_EXIT_FAIL; } pthread_join(thread, &tret); return T_EXIT_PASS; } static int test_file(struct io_uring *ring, char *__fname) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; char filename[64], *fname; int fd, ret, i; void *buf; if (!__fname) { fname = filename; sprintf(fname, ".defer-tw-timeout.%d", getpid()); t_create_file(fname, 128*1024); } else { fname = __fname; } fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); if (!__fname) unlink(fname); return T_EXIT_FAIL; } if (!__fname) unlink(fname); if (posix_memalign(&buf, 4096, 4096)) { close(fd); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fd, buf, 4096, 0); ts.tv_sec = 1; ts.tv_nsec = 0; ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL); if (ret != 1) { fprintf(stderr, "unexpected wait ret %d\n", ret); close(fd); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { ret = io_uring_peek_cqe(ring, &cqe); if (ret) break; io_uring_cqe_seen(ring, cqe); } if (i != 1) { fprintf(stderr, "Got %d request, expected 1\n", i); close(fd); return T_EXIT_FAIL; } close(fd); return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring ring; char *fname = NULL; int ret; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret == -EINVAL) return T_EXIT_SKIP; if (argc > 1) fname = argv[1]; ret = test_file(&ring, fname); if (ret != T_EXIT_PASS) return ret; ret = test_poll(&ring); if (ret != T_EXIT_PASS) return ret; return T_EXIT_PASS; } liburing-2.6/test/defer.c000066400000000000000000000132551461424365000154170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define RING_SIZE 128 enum { OP_NOP, OP_REMOVE_BUFFERS }; struct test_context { struct io_uring *ring; struct io_uring_sqe **sqes; struct io_uring_cqe *cqes; int nr; }; static void free_context(struct test_context *ctx) { free(ctx->sqes); free(ctx->cqes); memset(ctx, 0, sizeof(*ctx)); } static int init_context(struct test_context *ctx, struct io_uring *ring, int nr, int op) { struct io_uring_sqe *sqe; int i; memset(ctx, 0, sizeof(*ctx)); ctx->nr = nr; ctx->ring = ring; ctx->sqes = t_malloc(nr * sizeof(*ctx->sqes)); ctx->cqes = t_malloc(nr * sizeof(*ctx->cqes)); if (!ctx->sqes || !ctx->cqes) goto err; for (i = 0; i < nr; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) goto err; switch (op) { case OP_NOP: io_uring_prep_nop(sqe); break; case OP_REMOVE_BUFFERS: io_uring_prep_remove_buffers(sqe, 10, 1); break; } sqe->user_data = i; ctx->sqes[i] = sqe; } return 0; err: free_context(ctx); printf("init context failed\n"); return 1; } static int wait_cqes(struct test_context *ctx) { int ret, i; struct io_uring_cqe *cqe; for (i = 0; i < ctx->nr; i++) { ret = io_uring_wait_cqe(ctx->ring, &cqe); if (ret < 0) { printf("wait_cqes: wait completion %d\n", ret); return 1; } memcpy(&ctx->cqes[i], cqe, sizeof(*cqe)); io_uring_cqe_seen(ctx->ring, cqe); } return 0; } static int test_cancelled_userdata(struct io_uring *ring) { struct test_context ctx; int ret, i, nr = 100; if (init_context(&ctx, ring, nr, OP_NOP)) return 1; for (i = 0; i < nr; i++) ctx.sqes[i]->flags |= IOSQE_IO_LINK; ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } if (wait_cqes(&ctx)) goto err; for (i = 0; i < nr; i++) { if (i != ctx.cqes[i].user_data) { printf("invalid user data\n"); goto err; } } free_context(&ctx); return 0; err: free_context(&ctx); return 1; } static int test_thread_link_cancel(struct io_uring *ring) { struct test_context ctx; int ret, i, nr = 100; if (init_context(&ctx, ring, nr, OP_REMOVE_BUFFERS)) return 1; for (i = 0; i < nr; i++) ctx.sqes[i]->flags |= IOSQE_IO_LINK; ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } if (wait_cqes(&ctx)) goto err; for (i = 0; i < nr; i++) { bool fail = false; if (i == 0) fail = (ctx.cqes[i].res != -ENOENT); else fail = (ctx.cqes[i].res != -ECANCELED); if (fail) { printf("invalid status %d\n", ctx.cqes[i].res); goto err; } } free_context(&ctx); return 0; err: free_context(&ctx); return 1; } static int test_drain_with_linked_timeout(struct io_uring *ring) { const int nr = 3; struct __kernel_timespec ts = { .tv_sec = 1, .tv_nsec = 0, }; struct test_context ctx; int ret, i; if (init_context(&ctx, ring, nr * 2, OP_NOP)) return 1; for (i = 0; i < nr; i++) { io_uring_prep_timeout(ctx.sqes[2 * i], &ts, 0, 0); ctx.sqes[2 * i]->flags |= IOSQE_IO_LINK | IOSQE_IO_DRAIN; io_uring_prep_link_timeout(ctx.sqes[2 * i + 1], &ts, 0); } ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } if (wait_cqes(&ctx)) goto err; free_context(&ctx); return 0; err: free_context(&ctx); return 1; } static int run_drained(struct io_uring *ring, int nr) { struct test_context ctx; int ret, i; if (init_context(&ctx, ring, nr, OP_NOP)) return 1; for (i = 0; i < nr; i++) ctx.sqes[i]->flags |= IOSQE_IO_DRAIN; ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } if (wait_cqes(&ctx)) goto err; free_context(&ctx); return 0; err: free_context(&ctx); return 1; } static int test_overflow_hung(struct io_uring *ring) { struct io_uring_sqe *sqe; int ret, nr = 10; while (*ring->cq.koverflow != 1000) { sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); return 1; } io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); return 1; } } return run_drained(ring, nr); } static int test_dropped_hung(struct io_uring *ring) { int nr = 10; *ring->sq.kdropped = 1000; return run_drained(ring, nr); } int main(int argc, char *argv[]) { struct io_uring ring, poll_ring, sqthread_ring; struct io_uring_params p; int ret; if (argc > 1) return T_EXIT_SKIP; memset(&p, 0, sizeof(p)); ret = io_uring_queue_init_params(RING_SIZE, &ring, &p); if (ret) { printf("ring setup failed %i\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(RING_SIZE, &poll_ring, IORING_SETUP_IOPOLL); if (ret) { printf("poll_ring setup failed\n"); return T_EXIT_FAIL; } ret = test_cancelled_userdata(&poll_ring); if (ret) { printf("test_cancelled_userdata failed\n"); return ret; } if (!(p.features & IORING_FEAT_NODROP)) { ret = test_overflow_hung(&ring); if (ret) { printf("test_overflow_hung failed\n"); return ret; } } ret = test_dropped_hung(&ring); if (ret) { printf("test_dropped_hung failed\n"); return ret; } ret = test_drain_with_linked_timeout(&ring); if (ret) { printf("test_drain_with_linked_timeout failed\n"); return ret; } ret = t_create_ring(RING_SIZE, &sqthread_ring, IORING_SETUP_SQPOLL | IORING_SETUP_IOPOLL); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret < 0) return T_EXIT_FAIL; ret = test_thread_link_cancel(&sqthread_ring); if (ret) { printf("test_thread_link_cancel failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/double-poll-crash.c000066400000000000000000000144211461424365000176420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // https://syzkaller.appspot.com/bug?id=5c9918d20f771265ad0ffae3c8f3859d24850692 // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" #define SIZEOF_IO_URING_SQE 64 #define SIZEOF_IO_URING_CQE 16 #define SQ_HEAD_OFFSET 0 #define SQ_TAIL_OFFSET 64 #define SQ_RING_MASK_OFFSET 256 #define SQ_RING_ENTRIES_OFFSET 264 #define SQ_FLAGS_OFFSET 276 #define SQ_DROPPED_OFFSET 272 #define CQ_HEAD_OFFSET 128 #define CQ_TAIL_OFFSET 192 #define CQ_RING_MASK_OFFSET 260 #define CQ_RING_ENTRIES_OFFSET 268 #define CQ_RING_OVERFLOW_OFFSET 284 #define CQ_FLAGS_OFFSET 280 #define CQ_CQES_OFFSET 320 static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5) { uint32_t entries = (uint32_t)a0; struct io_uring_params* setup_params = (struct io_uring_params*)a1; void* vma1 = (void*)a2; void* vma2 = (void*)a3; void** ring_ptr_out = (void**)a4; void** sqes_ptr_out = (void**)a5; uint32_t fd_io_uring = __sys_io_uring_setup(entries, setup_params); uint32_t sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t); uint32_t cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE; uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQ_RING); if (*ring_ptr_out == MAP_FAILED) exit(0); uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); if (*sqes_ptr_out == MAP_FAILED) exit(0); return fd_io_uring; } static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2, volatile long a3) { char* ring_ptr = (char*)a0; char* sqes_ptr = (char*)a1; char* sqe = (char*)a2; uint32_t sqes_index = (uint32_t)a3; uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET); uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET); uint32_t sq_array_off = (CQ_CQES_OFFSET + cq_ring_entries * SIZEOF_IO_URING_CQE + 63) & ~63; if (sq_ring_entries) sqes_index %= sq_ring_entries; char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE; memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET); uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET); uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask; uint32_t sq_tail_next = *sq_tail_ptr + 1; uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off); *(sq_array + sq_tail) = sqes_index; __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); return 0; } static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2) { if (a0 == 0xc || a0 == 0xb) { char buf[128]; sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8_t)a1, (uint8_t)a2); return open(buf, O_RDWR, 0); } else { char buf[1024]; char* hash; strncpy(buf, (char*)a0, sizeof(buf) - 1); buf[sizeof(buf) - 1] = 0; while ((hash = strchr(buf, '#'))) { *hash = '0' + (char)(a1 % 10); a1 /= 10; } return open(buf, a2, 0); } } static uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff}; int main(int argc, char *argv[]) { void *mmap_ret; #if !defined(__i386) && !defined(__x86_64__) return T_EXIT_SKIP; #endif if (argc > 1) return T_EXIT_SKIP; mmap_ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); if (mmap_ret == MAP_FAILED) return T_EXIT_SKIP; mmap_ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); if (mmap_ret == MAP_FAILED) return T_EXIT_SKIP; intptr_t res = 0; *(uint32_t*)0x20000484 = 0; *(uint32_t*)0x20000488 = 0; *(uint32_t*)0x2000048c = 0; *(uint32_t*)0x20000490 = 0; *(uint32_t*)0x20000498 = -1; *(uint32_t*)0x2000049c = 0; *(uint32_t*)0x200004a0 = 0; *(uint32_t*)0x200004a4 = 0; res = -1; res = syz_io_uring_setup(0x6ad4, 0x20000480, 0x20ee7000, 0x20ffb000, 0x20000180, 0x20000040); if (res != -1) { r[0] = res; r[1] = *(uint64_t*)0x20000180; r[2] = *(uint64_t*)0x20000040; } res = -1; res = syz_open_dev(0xc, 4, 0x15); if (res != -1) r[3] = res; *(uint8_t*)0x20000000 = 6; *(uint8_t*)0x20000001 = 0; *(uint16_t*)0x20000002 = 0; *(uint32_t*)0x20000004 = r[3]; *(uint64_t*)0x20000008 = 0; *(uint64_t*)0x20000010 = 0; *(uint32_t*)0x20000018 = 0; *(uint16_t*)0x2000001c = 0; *(uint16_t*)0x2000001e = 0; *(uint64_t*)0x20000020 = 0; *(uint16_t*)0x20000028 = 0; *(uint16_t*)0x2000002a = 0; *(uint8_t*)0x2000002c = 0; *(uint8_t*)0x2000002d = 0; *(uint8_t*)0x2000002e = 0; *(uint8_t*)0x2000002f = 0; *(uint8_t*)0x20000030 = 0; *(uint8_t*)0x20000031 = 0; *(uint8_t*)0x20000032 = 0; *(uint8_t*)0x20000033 = 0; *(uint8_t*)0x20000034 = 0; *(uint8_t*)0x20000035 = 0; *(uint8_t*)0x20000036 = 0; *(uint8_t*)0x20000037 = 0; *(uint8_t*)0x20000038 = 0; *(uint8_t*)0x20000039 = 0; *(uint8_t*)0x2000003a = 0; *(uint8_t*)0x2000003b = 0; *(uint8_t*)0x2000003c = 0; *(uint8_t*)0x2000003d = 0; *(uint8_t*)0x2000003e = 0; *(uint8_t*)0x2000003f = 0; syz_io_uring_submit(r[1], r[2], 0x20000000, 0); __sys_io_uring_enter(r[0], 0x20450c, 0, 0ul, 0ul); *(uint32_t*)0x20000080 = 0x7ff; *(uint32_t*)0x20000084 = 0x8b7; *(uint32_t*)0x20000088 = 3; *(uint32_t*)0x2000008c = 0x101; *(uint8_t*)0x20000090 = 9; memcpy((void*)0x20000091, "\xaf\x09\x01\xbc\xf9\xc6\xe4\x92\x86\x51\x7d\x7f" "\xbd\x43\x7d\x16\x69\x3e\x05", 19); ioctl(r[3], 0x5404, 0x20000080ul); return T_EXIT_PASS; } liburing-2.6/test/drop-submit.c000066400000000000000000000030651461424365000165750ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test IORING_SETUP_SUBMIT_ALL * */ #include #include #include #include #include "liburing.h" #include "helpers.h" static int test(struct io_uring *ring, int expect_drops) { struct io_uring_sqe *sqe; char buf[32]; int ret, i; for (i = 0; i < 4; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); } /* prep two invalid reads, these will fail */ for (i = 0; i < 2; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_read(sqe, 128, buf, sizeof(buf), 0); sqe->ioprio = (short) -1; } ret = io_uring_submit(ring); if (expect_drops) { if (ret != 5) { fprintf(stderr, "drops submit failed: %d\n", ret); goto err; } } else { if (ret != 6) { fprintf(stderr, "no drops submit failed: %d\n", ret); goto err; } } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SUBMIT_ALL); if (ret) return 0; ret = test(&ring, 0); if (ret) { fprintf(stderr, "test no drops failed\n"); return ret; } io_uring_queue_exit(&ring); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } ret = test(&ring, 1); if (ret) { fprintf(stderr, "test drops failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/eeed8b54e0df.c000066400000000000000000000035601461424365000163740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: -EAGAIN handling * */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define BLOCK 4096 #ifndef RWF_NOWAIT #define RWF_NOWAIT 8 #endif static int get_file_fd(void) { ssize_t ret; char *buf; int fd; fd = open("testfile", O_RDWR | O_CREAT, 0644); unlink("testfile"); if (fd < 0) { perror("open file"); return -1; } buf = t_malloc(BLOCK); memset(buf, 0, BLOCK); ret = write(fd, buf, BLOCK); if (ret != BLOCK) { if (ret < 0) perror("write"); else printf("Short write\n"); goto err; } fsync(fd); if (posix_fadvise(fd, 0, 4096, POSIX_FADV_DONTNEED)) { perror("fadvise"); err: close(fd); free(buf); return -1; } free(buf); return fd; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov; int ret, fd; if (argc > 1) return T_EXIT_SKIP; iov.iov_base = t_malloc(4096); iov.iov_len = 4096; ret = io_uring_queue_init(2, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); return T_EXIT_FAIL; } fd = get_file_fd(); if (fd < 0) return T_EXIT_FAIL; io_uring_prep_readv(sqe, fd, &iov, 1, 0); sqe->rw_flags = RWF_NOWAIT; ret = io_uring_submit(&ring); if (ret != 1) { printf("Got submit %d, expected 1\n", ret); goto err; } ret = io_uring_peek_cqe(&ring, &cqe); if (ret) { printf("Ring peek got %d\n", ret); goto err; } ret = T_EXIT_PASS; if (cqe->res != -EAGAIN && cqe->res != 4096) { if (cqe->res == -EOPNOTSUPP) { ret = T_EXIT_SKIP; } else { printf("cqe error: %d\n", cqe->res); goto err; } } close(fd); return ret; err: close(fd); return T_EXIT_FAIL; } liburing-2.6/test/empty-eownerdead.c000066400000000000000000000016401461424365000175760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test if entering with nothing to submit/wait for SQPOLL returns an error. */ #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; p.flags = IORING_SETUP_SQPOLL; p.sq_thread_idle = 100; ret = t_create_ring_params(1, &ring, &p); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret < 0) goto err; ret = __sys_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL); if (ret < 0) { int __e = errno; if (__e == EOWNERDEAD) fprintf(stderr, "sqe submit unexpected failure due old kernel bug: %s\n", strerror(__e)); else fprintf(stderr, "sqe submit unexpected failure: %s\n", strerror(__e)); goto err; } return T_EXIT_PASS; err: return T_EXIT_FAIL; } liburing-2.6/test/eploop.c000066400000000000000000000031641461424365000156260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test that we don't recursively generate completion events if an io_uring * fd is added to an epoll context, and the ring itself polls for events on * the epollfd. Older kernels will stop on overflow, newer kernels will * detect this earlier and abort correctly. */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct epoll_event ev = { }; int epollfd, ret, i; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "Ring init failed: %d\n", ret); return T_EXIT_FAIL; } epollfd = epoll_create1(0); if (epollfd < 0) { perror("epoll_create"); return T_EXIT_FAIL; } ev.events = EPOLLIN; ev.data.fd = ring.ring_fd; ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, ring.ring_fd, &ev); if (ret < 0) { perror("epoll_ctl"); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_multishot(sqe, epollfd, POLLIN); sqe->user_data = 1; io_uring_submit(&ring); sqe = io_uring_get_sqe(&ring); sqe->user_data = 2; io_uring_prep_nop(sqe); io_uring_submit(&ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe ret = %d\n", ret); break; } io_uring_cqe_seen(&ring, cqe); } ret = io_uring_peek_cqe(&ring, &cqe); if (!ret) { fprintf(stderr, "Generated too many events\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/eventfd-disable.c000066400000000000000000000066731461424365000173740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test disable/enable notifications through eventfd * */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int test(bool defer) { struct io_uring_params p = {}; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; int ret, evfd, i; if (defer) p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init_params(64, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } evfd = eventfd(0, EFD_CLOEXEC); if (evfd < 0) { perror("eventfd"); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring, evfd); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } if (!io_uring_cq_eventfd_enabled(&ring)) { fprintf(stderr, "eventfd disabled\n"); return T_EXIT_FAIL; } ret = io_uring_cq_eventfd_toggle(&ring, false); if (ret) { fprintf(stdout, "Skipping, CQ flags not available!\n"); return T_EXIT_SKIP; } sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, evfd, &vec, 1, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 63; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->user_data = 2; } ret = io_uring_submit(&ring); if (ret != 63) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 63; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return T_EXIT_FAIL; } switch (cqe->user_data) { case 1: /* eventfd */ fprintf(stderr, "eventfd unexpected: %d\n", (int)ptr); return T_EXIT_FAIL; case 2: if (cqe->res) { fprintf(stderr, "nop: %d\n", cqe->res); return T_EXIT_FAIL; } break; } io_uring_cqe_seen(&ring, cqe); } ret = io_uring_cq_eventfd_toggle(&ring, true); if (ret) { fprintf(stderr, "io_uring_cq_eventfd_toggle: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return T_EXIT_FAIL; } switch (cqe->user_data) { case 1: /* eventfd */ if (cqe->res != sizeof(ptr)) { fprintf(stderr, "read res: %d\n", cqe->res); return T_EXIT_FAIL; } if (ptr != 1) { fprintf(stderr, "eventfd: %d\n", (int)ptr); return T_EXIT_FAIL; } break; case 2: if (cqe->res) { fprintf(stderr, "nop: %d\n", cqe->res); return T_EXIT_FAIL; } break; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); close(evfd); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(false); if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(false) failed\n", argv[0]); return ret; } if (t_probe_defer_taskrun()) { ret = test(true); if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(true) failed\n", argv[0]); return ret; } } return ret; } liburing-2.6/test/eventfd-reg.c000066400000000000000000000031671461424365000165410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test eventfd registration+unregistration * */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct io_uring ring; int ret, evfd[2], i; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } evfd[0] = eventfd(0, EFD_CLOEXEC); evfd[1] = eventfd(0, EFD_CLOEXEC); if (evfd[0] < 0 || evfd[1] < 0) { perror("eventfd"); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring, evfd[0]); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } /* Check that registering again will get -EBUSY */ ret = io_uring_register_eventfd(&ring, evfd[1]); if (ret != -EBUSY) { fprintf(stderr, "unexpected 2nd register: %d\n", ret); return T_EXIT_FAIL; } close(evfd[1]); ret = io_uring_unregister_eventfd(&ring); if (ret) { fprintf(stderr, "unexpected unregister: %d\n", ret); return T_EXIT_FAIL; } /* loop 100 registers/unregister */ for (i = 0; i < 100; i++) { ret = io_uring_register_eventfd(&ring, evfd[0]); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_unregister_eventfd(&ring); if (ret) { fprintf(stderr, "unexpected unregister: %d\n", ret); return T_EXIT_FAIL; } } close(evfd[0]); return T_EXIT_PASS; } liburing-2.6/test/eventfd-ring.c000066400000000000000000000037301461424365000167170ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test use of eventfds with multiple rings * */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct io_uring ring1, ring2; struct io_uring_sqe *sqe; int ret, evfd1, evfd2; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init_params(8, &ring1, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) { fprintf(stdout, "Skipping\n"); return T_EXIT_SKIP; } ret = io_uring_queue_init(8, &ring2, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } evfd1 = eventfd(0, EFD_CLOEXEC); if (evfd1 < 0) { perror("eventfd"); return T_EXIT_FAIL; } evfd2 = eventfd(0, EFD_CLOEXEC); if (evfd2 < 0) { perror("eventfd"); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring1, evfd1); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring2, evfd2); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring1); io_uring_prep_poll_add(sqe, evfd2, POLLIN); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring2); io_uring_prep_poll_add(sqe, evfd1, POLLIN); sqe->user_data = 1; ret = io_uring_submit(&ring1); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_submit(&ring2); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring1); io_uring_prep_nop(sqe); sqe->user_data = 3; ret = io_uring_submit(&ring1); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/eventfd.c000066400000000000000000000043031461424365000157570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various eventfd tests * */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; int ret, evfd, i; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } if (!(p.features & IORING_FEAT_CUR_PERSONALITY)) { fprintf(stdout, "Skipping\n"); return T_EXIT_SKIP; } evfd = eventfd(0, EFD_CLOEXEC); if (evfd < 0) { perror("eventfd"); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring, evfd); if (ret) { fprintf(stderr, "failed to register evfd: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, evfd, POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, evfd, &vec, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->user_data = 3; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return T_EXIT_FAIL; } switch (cqe->user_data) { case 1: /* POLLIN */ if (cqe->res != 1) { fprintf(stderr, "poll: %d\n", cqe->res); return T_EXIT_FAIL; } break; case 2: if (cqe->res != sizeof(ptr)) { fprintf(stderr, "read: %d\n", cqe->res); return T_EXIT_FAIL; } break; case 3: if (cqe->res) { fprintf(stderr, "nop: %d\n", cqe->res); return T_EXIT_FAIL; } break; } io_uring_cqe_seen(&ring, cqe); } return T_EXIT_PASS; } liburing-2.6/test/evloop.c000066400000000000000000000031441461424365000156320ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test that we don't recursively generate completion events if an io_uring * has an eventfd registered that triggers on completions, and we add a poll * request with multishot on the eventfd. Older kernels will stop on overflow, * newer kernels will detect this earlier and abort correctly. */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, efd, i; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "Ring init failed: %d\n", ret); return T_EXIT_FAIL; } efd = eventfd(0, 0); if (efd < 0) { perror("eventfd"); return T_EXIT_FAIL; } ret = io_uring_register_eventfd(&ring, efd); if (ret) { fprintf(stderr, "Ring eventfd register failed: %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_multishot(sqe, efd, POLLIN); sqe->user_data = 1; io_uring_submit(&ring); sqe = io_uring_get_sqe(&ring); sqe->user_data = 2; io_uring_prep_nop(sqe); io_uring_submit(&ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe ret = %d\n", ret); break; } io_uring_cqe_seen(&ring, cqe); } ret = io_uring_peek_cqe(&ring, &cqe); if (!ret) { fprintf(stderr, "Generated too many events\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/exec-target.c000066400000000000000000000001241461424365000165310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ int main(int argc, char *argv[]) { return 0; } liburing-2.6/test/exit-no-cleanup.c000066400000000000000000000045731461424365000173450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test case testing exit without cleanup and io-wq work pending or queued. * * From Florian Fischer * Link: https://lore.kernel.org/io-uring/20211202165606.mqryio4yzubl7ms5@pasture/ * */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define IORING_ENTRIES 8 static pthread_t *threads; static pthread_barrier_t init_barrier; static int sleep_fd, notify_fd; static sem_t sem; static void *thread_func(void *arg) { struct io_uring ring; int res; res = io_uring_queue_init(IORING_ENTRIES, &ring, 0); if (res) err(EXIT_FAILURE, "io_uring_queue_init failed"); pthread_barrier_wait(&init_barrier); for(;;) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; uint64_t buf; int res; sqe = io_uring_get_sqe(&ring); assert(sqe); io_uring_prep_read(sqe, sleep_fd, &buf, sizeof(buf), 0); res = io_uring_submit_and_wait(&ring, 1); if (res < 0) err(EXIT_FAILURE, "io_uring_submit_and_wait failed"); res = io_uring_peek_cqe(&ring, &cqe); assert(!res); if (cqe->res < 0) { errno = -cqe->res; err(EXIT_FAILURE, "read failed"); } assert(cqe->res == sizeof(buf)); sem_post(&sem); io_uring_cqe_seen(&ring, cqe); } return NULL; } int main(int argc, char *argv[]) { int res, fds[2], i, cpus; const uint64_t n = 0x42; if (argc > 1) return T_EXIT_SKIP; cpus = get_nprocs(); res = pthread_barrier_init(&init_barrier, NULL, cpus); if (res) err(EXIT_FAILURE, "pthread_barrier_init failed"); res = sem_init(&sem, 0, 0); if (res) err(EXIT_FAILURE, "sem_init failed"); threads = t_malloc(sizeof(pthread_t) * cpus); res = pipe(fds); if (res) err(EXIT_FAILURE, "pipe failed"); sleep_fd = fds[0]; notify_fd = fds[1]; for (i = 0; i < cpus; i++) { errno = pthread_create(&threads[i], NULL, thread_func, NULL); if (errno) err(EXIT_FAILURE, "pthread_create failed"); } // Write #cpus notifications for (i = 0; i < cpus; i++) { res = write(notify_fd, &n, sizeof(n)); if (res < 0) err(EXIT_FAILURE, "write failed"); assert(res == sizeof(n)); } // Await that all notifications were received for (i = 0; i < cpus; i++) sem_wait(&sem); // Exit without resource cleanup exit(EXIT_SUCCESS); } liburing-2.6/test/fadvise.c000066400000000000000000000072461461424365000157560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: basic fadvise test */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024) #define LOOPS 100 #define MIN_LOOPS 10 static unsigned long long utime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000000; return sec + usec; } static unsigned long long utime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return utime_since(tv, &end); } static int do_fadvise(struct io_uring *ring, int fd, off_t offset, off_t len, int advice) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_fadvise(sqe, fd, offset, len, advice); sqe->user_data = advice; ret = io_uring_submit_and_wait(ring, 1); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return 1; } ret = cqe->res; if (ret == -EINVAL || ret == -EBADF) { fprintf(stdout, "Fadvise not supported, skipping\n"); unlink(".fadvise.tmp"); exit(T_EXIT_SKIP); } else if (ret) { fprintf(stderr, "cqe->res=%d\n", cqe->res); } io_uring_cqe_seen(ring, cqe); return ret; } static long do_read(int fd, char *buf) { struct timeval tv; int ret; long t; ret = lseek(fd, 0, SEEK_SET); if (ret) { perror("lseek"); return -1; } gettimeofday(&tv, NULL); ret = read(fd, buf, FILE_SIZE); t = utime_since_now(&tv); if (ret < 0) { perror("read"); return -1; } else if (ret != FILE_SIZE) { fprintf(stderr, "short read1: %d\n", ret); return -1; } return t; } static int test_fadvise(struct io_uring *ring, const char *filename) { unsigned long cached_read, uncached_read, cached_read2; int fd, ret; char *buf; fd = open(filename, O_RDONLY); if (fd < 0) { perror("open"); return 1; } buf = t_malloc(FILE_SIZE); cached_read = do_read(fd, buf); if (cached_read == -1) return 1; ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_DONTNEED); if (ret) return 1; uncached_read = do_read(fd, buf); if (uncached_read == -1) return 1; ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_DONTNEED); if (ret) return 1; ret = do_fadvise(ring, fd, 0, FILE_SIZE, POSIX_FADV_WILLNEED); if (ret) return 1; fsync(fd); cached_read2 = do_read(fd, buf); if (cached_read2 == -1) return 1; if (cached_read < uncached_read && cached_read2 < uncached_read) return 0; return 2; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, i, good, bad; char *fname; if (argc > 1) { fname = argv[1]; } else { fname = ".fadvise.tmp"; t_create_file(fname, FILE_SIZE); } if (io_uring_queue_init(8, &ring, 0)) { fprintf(stderr, "ring creation failed\n"); goto err; } good = bad = 0; for (i = 0; i < LOOPS; i++) { ret = test_fadvise(&ring, fname); if (ret == 1) { fprintf(stderr, "read_fadvise failed\n"); goto err; } else if (!ret) good++; else if (ret == 2) bad++; if (i >= MIN_LOOPS && !bad) break; } /* too hard to reliably test, just ignore */ if ((0) && bad > good) { fprintf(stderr, "Suspicious timings\n"); goto err; } if (fname != argv[1]) unlink(fname); io_uring_queue_exit(&ring); return T_EXIT_PASS; err: if (fname != argv[1]) unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/fallocate.c000066400000000000000000000112401461424365000162540ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring fallocate * */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_fallocate; static int test_fallocate_rlimit(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct rlimit rlim; char buf[32]; int fd, ret; if (getrlimit(RLIMIT_FSIZE, &rlim) < 0) { perror("getrlimit"); return 1; } rlim.rlim_cur = 64 * 1024; rlim.rlim_max = 64 * 1024; if (setrlimit(RLIMIT_FSIZE, &rlim) < 0) { perror("setrlimit"); return 1; } sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } unlink(buf); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "Fallocate not supported, skipping\n"); no_fallocate = 1; goto skip; } else if (cqe->res != -EFBIG) { fprintf(stderr, "Expected -EFBIG: %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return 0; skip: return T_EXIT_SKIP; err: return 1; } static int test_fallocate(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct stat st; char buf[32]; int fd, ret; sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } unlink(buf); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "Fallocate not supported, skipping\n"); no_fallocate = 1; goto skip; } if (cqe->res) { fprintf(stderr, "cqe->res=%d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); if (fstat(fd, &st) < 0) { perror("stat"); goto err; } if (st.st_size != 128*1024) { fprintf(stderr, "Size mismatch: %llu\n", (unsigned long long) st.st_size); goto err; } return 0; skip: return T_EXIT_SKIP; err: return 1; } static int test_fallocate_fsync(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct stat st; char buf[32]; int fd, ret, i; if (no_fallocate) return 0; sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } unlink(buf); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fallocate(sqe, fd, 0, 0, 128*1024); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fsync(sqe, fd, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res) { fprintf(stderr, "cqe->res=%d,data=%" PRIu64 "\n", cqe->res, (uint64_t) cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } if (fstat(fd, &st) < 0) { perror("stat"); goto err; } if (st.st_size != 128*1024) { fprintf(stderr, "Size mismatch: %llu\n", (unsigned long long) st.st_size); goto err; } return 0; err: return 1; } static void sig_xfsz(int sig) { } int main(int argc, char *argv[]) { struct sigaction act = { }; struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; act.sa_handler = sig_xfsz; sigaction(SIGXFSZ, &act, NULL); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } ret = test_fallocate(&ring); if (ret) { if (ret != T_EXIT_SKIP) { fprintf(stderr, "test_fallocate failed\n"); } return ret; } ret = test_fallocate_fsync(&ring); if (ret) { fprintf(stderr, "test_fallocate_fsync failed\n"); return ret; } ret = test_fallocate_rlimit(&ring); if (ret) { if (ret != T_EXIT_SKIP) { fprintf(stderr, "test_fallocate_rlimit failed\n"); } return ret; } return T_EXIT_PASS; } liburing-2.6/test/fc2a85cb02ef.c000066400000000000000000000067361461424365000163120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // https://syzkaller.appspot.com/bug?id=1f2ecd7a23dba87e5ca3505ec44514a462cfe8c0 // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } static int inject_fault(int nth) { int fd; fd = open("/proc/thread-self/fail-nth", O_RDWR); if (fd == -1) exit(1); char buf[16]; sprintf(buf, "%d", nth + 1); if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) exit(1); return fd; } static int setup_fault(void) { static struct { const char* file; const char* val; bool fatal; } files[] = { {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true}, {"/sys/kernel/debug/failslab/verbose", "0", false}, {"/sys/kernel/debug/fail_futex/ignore-private", "N", false}, {"/sys/kernel/debug/fail_page_alloc/verbose", "0", false}, {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false}, {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false}, {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false}, }; unsigned i; for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) { if (!write_file(files[i].file, files[i].val)) { if (files[i].fatal) return 1; } } return 0; } static uint64_t r[2] = {0xffffffffffffffff, 0xffffffffffffffff}; int main(int argc, char *argv[]) { if (argc > 1) return T_EXIT_SKIP; mmap((void *) 0x20000000ul, 0x1000000ul, 3ul, MAP_ANON|MAP_PRIVATE, -1, 0); if (setup_fault()) { printf("Test needs failslab/fail_futex/fail_page_alloc enabled, skipped\n"); return T_EXIT_SKIP; } intptr_t res = 0; *(uint32_t*)0x20000000 = 0; *(uint32_t*)0x20000004 = 0; *(uint32_t*)0x20000008 = 0; *(uint32_t*)0x2000000c = 0; *(uint32_t*)0x20000010 = 0; *(uint32_t*)0x20000014 = 0; *(uint32_t*)0x20000018 = 0; *(uint32_t*)0x2000001c = 0; *(uint32_t*)0x20000020 = 0; *(uint32_t*)0x20000024 = 0; *(uint32_t*)0x20000028 = 0; *(uint32_t*)0x2000002c = 0; *(uint32_t*)0x20000030 = 0; *(uint32_t*)0x20000034 = 0; *(uint32_t*)0x20000038 = 0; *(uint32_t*)0x2000003c = 0; *(uint32_t*)0x20000040 = 0; *(uint32_t*)0x20000044 = 0; *(uint64_t*)0x20000048 = 0; *(uint32_t*)0x20000050 = 0; *(uint32_t*)0x20000054 = 0; *(uint32_t*)0x20000058 = 0; *(uint32_t*)0x2000005c = 0; *(uint32_t*)0x20000060 = 0; *(uint32_t*)0x20000064 = 0; *(uint32_t*)0x20000068 = 0; *(uint32_t*)0x2000006c = 0; *(uint64_t*)0x20000070 = 0; res = __sys_io_uring_setup(0x6a6, (struct io_uring_params *) 0x20000000ul); if (res != -1) r[0] = res; res = socket(0x11ul, 2ul, 0x300ul); if (res != -1) r[1] = res; *(uint32_t*)0x20000080 = r[1]; inject_fault(1); __sys_io_uring_register(r[0], 2ul, (const void *) 0x20000080ul, 1ul); return T_EXIT_PASS; } liburing-2.6/test/fd-install.c000066400000000000000000000252041461424365000163640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test installing a direct descriptor into the regular * file table * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_fd_install; /* test that O_CLOEXEC is accepted, and others are not */ static int test_flags(struct io_uring *ring, int async) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2], fd; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ret = io_uring_register_files(ring, &fds[0], 1); if (ret) { fprintf(stderr, "failed register files %d\n", ret); return T_EXIT_FAIL; } /* check that setting an invalid flag fails */ sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, 1U << 17); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != -EINVAL) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); /* check that IORING_FIXED_FD_NO_CLOEXEC is accepted */ sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, IORING_FIXED_FD_NO_CLOEXEC); if (async) sqe->flags |= IOSQE_ASYNC; io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } fd = cqe->res; io_uring_cqe_seen(ring, cqe); close(fds[0]); close(fds[1]); close(fd); io_uring_unregister_files(ring); return T_EXIT_PASS; } static int test_linked(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2], fd, i; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ret = io_uring_register_files(ring, &fds[0], 1); if (ret) { fprintf(stderr, "failed register files %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } fd = -1; for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } if (cqe->user_data == 2) fd = cqe->res; io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); if (fd != -1) close(fd); io_uring_unregister_files(ring); return T_EXIT_PASS; } /* test not setting IOSQE_FIXED_FILE */ static int test_not_fixed(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2]; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ret = io_uring_register_files(ring, &fds[0], 1); if (ret) { fprintf(stderr, "failed register files %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, 0); sqe->flags &= ~IOSQE_FIXED_FILE; io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != -EBADF) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); close(fds[0]); close(fds[1]); io_uring_unregister_files(ring); return T_EXIT_PASS; } /* test invalid direct descriptor indexes */ static int test_bad_fd(struct io_uring *ring, int some_fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, some_fd, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != -EBADF) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); return T_EXIT_PASS; } /* test basic functionality of shifting a direct descriptor to a normal file */ static int test_working(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2]; char buf[32]; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } /* register read side */ ret = io_uring_register_files(ring, &fds[0], 1); if (ret) { fprintf(stderr, "failed register files %d\n", ret); return T_EXIT_FAIL; } /* close normal descriptor */ close(fds[0]); /* normal read should fail */ ret = read(fds[0], buf, 1); if (ret != -1) { fprintf(stderr, "unexpected read ret %d\n", ret); return T_EXIT_FAIL; } if (errno != EBADF) { fprintf(stderr, "unexpected read failure %d\n", errno); return T_EXIT_FAIL; } /* verify we can read the data */ sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); sqe->flags |= IOSQE_FIXED_FILE; io_uring_submit(ring); /* put some data in the pipe */ ret = write(fds[1], "Hello", 5); if (ret < 0) { perror("write"); return T_EXIT_FAIL; } else if (ret != 5) { fprintf(stderr, "short write %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != 5) { fprintf(stderr, "weird pipe read ret %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); /* fixed pipe read worked, now re-install as a regular fd */ sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res == -EINVAL) { no_fd_install = 1; return T_EXIT_SKIP; } if (cqe->res < 0) { fprintf(stderr, "failed install fd: %d\n", cqe->res); return T_EXIT_FAIL; } /* stash new pipe read side fd in old spot */ fds[0] = cqe->res; io_uring_cqe_seen(ring, cqe); ret = write(fds[1], "Hello", 5); if (ret < 0) { perror("write"); return T_EXIT_FAIL; } else if (ret != 5) { fprintf(stderr, "short write %d\n", ret); return T_EXIT_FAIL; } /* normal pipe read should now work with new fd */ ret = read(fds[0], buf, sizeof(buf)); if (ret != 5) { fprintf(stderr, "unexpected read ret %d\n", ret); return T_EXIT_FAIL; } /* close fixed file */ sqe = io_uring_get_sqe(ring); io_uring_prep_close_direct(sqe, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res) { fprintf(stderr, "close fixed fd %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); ret = write(fds[1], "Hello", 5); if (ret < 0) { perror("write"); return T_EXIT_FAIL; } else if (ret != 5) { fprintf(stderr, "short write %d\n", ret); return T_EXIT_FAIL; } /* normal pipe read should still work with new fd */ ret = read(fds[0], buf, sizeof(buf)); if (ret != 5) { fprintf(stderr, "unexpected read ret %d\n", ret); return T_EXIT_FAIL; } /* fixed fd pipe read should now fail */ sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); sqe->flags = IOSQE_FIXED_FILE; io_uring_submit(ring); /* put some data in the pipe */ ret = write(fds[1], "Hello", 5); if (ret < 0) { perror("write"); return T_EXIT_FAIL; } else if (ret != 5) { fprintf(stderr, "short write %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != -EBADF) { fprintf(stderr, "weird pipe read ret %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); close(fds[0]); close(fds[1]); io_uring_unregister_files(ring); return T_EXIT_PASS; } static int test_creds(struct io_uring *ring, int async) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int cred_id, ret, fds[2]; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ret = io_uring_register_files(ring, &fds[0], 1); if (ret) { fprintf(stderr, "failed register files %d\n", ret); return T_EXIT_FAIL; } cred_id = io_uring_register_personality(ring); if (cred_id < 0) { fprintf(stderr, "Failed registering creds: %d\n", cred_id); return T_EXIT_FAIL; } /* check that asking for creds fails */ sqe = io_uring_get_sqe(ring); io_uring_prep_fixed_fd_install(sqe, 0, 0); if (async) sqe->flags |= IOSQE_ASYNC; sqe->personality = cred_id; io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->res > 0) { fprintf(stderr, "install succeeded with creds\n"); return T_EXIT_FAIL; } if (cqe->res != -EPERM) { fprintf(stderr, "unexpected cqe res %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); close(fds[0]); close(fds[1]); io_uring_unregister_files(ring); io_uring_unregister_personality(ring, cred_id); return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(4, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = test_working(&ring); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_working failed\n"); return ret; } if (no_fd_install) return T_EXIT_SKIP; ret = test_bad_fd(&ring, 0); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_bad_fd 0 failed\n"); return ret; } ret = test_bad_fd(&ring, 500); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_bad_fd 500 failed\n"); return ret; } ret = test_not_fixed(&ring); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_not_fixed failed\n"); return ret; } ret = test_flags(&ring, 0); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_flags 0 failed\n"); return ret; } ret = test_flags(&ring, 1); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_flags 1 failed\n"); return ret; } ret = test_creds(&ring, 0); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_creds 0 failed\n"); return ret; } ret = test_creds(&ring, 1); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_creds 1 failed\n"); return ret; } ret = test_linked(&ring); if (ret != T_EXIT_PASS) { if (ret == T_EXIT_FAIL) fprintf(stderr, "test_linked failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/fd-pass.c000066400000000000000000000126431461424365000156670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various fixed file fd passing tests * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define FSIZE 128 #define PAT 0x9a #define USER_DATA 0x89 static int no_fd_pass; static int verify_fixed_read(struct io_uring *ring, int fixed_fd, int fail) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; unsigned char buf[FSIZE]; int i; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fixed_fd, buf, FSIZE, 0); sqe->flags |= IOSQE_FIXED_FILE; io_uring_submit(ring); io_uring_wait_cqe(ring, &cqe); if (cqe->res != FSIZE) { if (fail && cqe->res == -EBADF) return 0; fprintf(stderr, "Read: %d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); for (i = 0; i < FSIZE; i++) { if (buf[i] != PAT) { fprintf(stderr, "got %x, wanted %x\n", buf[i], PAT); return 1; } } return 0; } static int test(const char *filename, int source_fd, int target_fd) { struct io_uring sring, dring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; ret = io_uring_queue_init(8, &sring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &dring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_register_files_sparse(&sring, 8); if (ret) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "register files failed %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_register_files_sparse(&dring, 8); if (ret) { fprintf(stderr, "register files failed %d\n", ret); return T_EXIT_FAIL; } if (target_fd == IORING_FILE_INDEX_ALLOC) { /* we want to test installing into a non-zero slot */ ret = io_uring_register_file_alloc_range(&dring, 1, 1); if (ret) { fprintf(stderr, "io_uring_register_file_alloc_range %d\n", ret); return T_EXIT_FAIL; } } /* open direct descriptor */ sqe = io_uring_get_sqe(&sring); io_uring_prep_openat_direct(sqe, AT_FDCWD, filename, 0, 0644, source_fd); io_uring_submit(&sring); ret = io_uring_wait_cqe(&sring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return T_EXIT_FAIL; } if (cqe->res) { fprintf(stderr, "cqe res %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&sring, cqe); /* verify data is sane for source ring */ if (verify_fixed_read(&sring, source_fd, 0)) return T_EXIT_FAIL; /* send direct descriptor to destination ring */ sqe = io_uring_get_sqe(&sring); if (target_fd == IORING_FILE_INDEX_ALLOC) { io_uring_prep_msg_ring_fd_alloc(sqe, dring.ring_fd, source_fd, USER_DATA, 0); } else { io_uring_prep_msg_ring_fd(sqe, dring.ring_fd, source_fd, target_fd, USER_DATA, 0); } io_uring_submit(&sring); ret = io_uring_wait_cqe(&sring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { if (cqe->res == -EINVAL && !no_fd_pass) { no_fd_pass = 1; return T_EXIT_SKIP; } fprintf(stderr, "msg_ring failed %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&sring, cqe); /* get posted completion for the passing */ ret = io_uring_wait_cqe(&dring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return T_EXIT_FAIL; } if (cqe->user_data != USER_DATA) { fprintf(stderr, "bad user_data %ld\n", (long) cqe->res); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "bad result %i\n", cqe->res); return T_EXIT_FAIL; } if (target_fd == IORING_FILE_INDEX_ALLOC) { if (cqe->res != 1) { fprintf(stderr, "invalid allocated index %i\n", cqe->res); return T_EXIT_FAIL; } target_fd = cqe->res; } io_uring_cqe_seen(&dring, cqe); /* now verify we can read the sane data from the destination ring */ if (verify_fixed_read(&dring, target_fd, 0)) return T_EXIT_FAIL; /* close descriptor in source ring */ sqe = io_uring_get_sqe(&sring); io_uring_prep_close_direct(sqe, source_fd); io_uring_submit(&sring); ret = io_uring_wait_cqe(&sring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return T_EXIT_FAIL; } if (cqe->res) { fprintf(stderr, "direct close failed %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&sring, cqe); /* check that source ring fails after close */ if (verify_fixed_read(&sring, source_fd, 1)) return T_EXIT_FAIL; /* check we can still read from destination ring */ if (verify_fixed_read(&dring, target_fd, 0)) return T_EXIT_FAIL; io_uring_queue_exit(&sring); io_uring_queue_exit(&dring); return T_EXIT_PASS; } int main(int argc, char *argv[]) { char fname[80]; int ret; if (argc > 1) return T_EXIT_SKIP; sprintf(fname, ".fd-pass.%d", getpid()); t_create_file_pattern(fname, FSIZE, PAT); ret = test(fname, 0, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed 0 1\n"); ret = T_EXIT_FAIL; } ret = test(fname, 0, 2); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed 0 2\n"); ret = T_EXIT_FAIL; } ret = test(fname, 1, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed 1 1\n"); ret = T_EXIT_FAIL; } ret = test(fname, 1, 0); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed 1 0\n"); ret = T_EXIT_FAIL; } ret = test(fname, 1, IORING_FILE_INDEX_ALLOC); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed 1 ALLOC\n"); ret = T_EXIT_FAIL; } unlink(fname); return ret; } liburing-2.6/test/file-register.c000066400000000000000000000571211461424365000170730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various file registration tests * */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int no_update = 0; static void close_files(int *files, int nr_files, int add) { char fname[32]; int i; for (i = 0; i < nr_files; i++) { if (files) close(files[i]); if (!add) sprintf(fname, ".reg.%d", i); else sprintf(fname, ".add.%d", i + add); unlink(fname); } if (files) free(files); } static int *open_files(int nr_files, int extra, int add) { char fname[32]; int *files; int i; files = t_calloc(nr_files + extra, sizeof(int)); for (i = 0; i < nr_files; i++) { if (!add) sprintf(fname, ".reg.%d", i); else sprintf(fname, ".add.%d", i + add); files[i] = open(fname, O_RDWR | O_CREAT, 0644); if (files[i] < 0) { perror("open"); free(files); files = NULL; break; } } if (extra) { for (i = nr_files; i < nr_files + extra; i++) files[i] = -1; } return files; } static int test_shrink(struct io_uring *ring) { int ret, off, fd; int *files; files = open_files(50, 0, 0); ret = io_uring_register_files(ring, files, 50); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } off = 0; do { fd = -1; ret = io_uring_register_files_update(ring, off, &fd, 1); if (ret != 1) { if (off == 50 && ret == -EINVAL) break; fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); break; } off++; } while (1); ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 50, 0); return 0; err: close_files(files, 50, 0); return 1; } static int test_grow(struct io_uring *ring) { int ret, off; int *files, *fds = NULL; files = open_files(50, 250, 0); ret = io_uring_register_files(ring, files, 300); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } off = 50; do { fds = open_files(1, 0, off); ret = io_uring_register_files_update(ring, off, fds, 1); if (ret != 1) { if (off == 300 && ret == -EINVAL) break; fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); break; } if (off >= 300) { fprintf(stderr, "%s: Succeeded beyond end-of-list?\n", __FUNCTION__); goto err; } off++; } while (1); ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 100, 0); close_files(NULL, 251, 50); return 0; err: close_files(files, 100, 0); close_files(NULL, 251, 50); return 1; } static int test_replace_all(struct io_uring *ring) { int *files, *fds = NULL; int ret, i; files = open_files(100, 0, 0); ret = io_uring_register_files(ring, files, 100); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } fds = t_malloc(100 * sizeof(int)); for (i = 0; i < 100; i++) fds[i] = -1; ret = io_uring_register_files_update(ring, 0, fds, 100); if (ret != 100) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 100, 0); if (fds) free(fds); return 0; err: close_files(files, 100, 0); if (fds) free(fds); return 1; } static int test_replace(struct io_uring *ring) { int *files, *fds = NULL; int ret; files = open_files(100, 0, 0); ret = io_uring_register_files(ring, files, 100); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } fds = open_files(10, 0, 1); ret = io_uring_register_files_update(ring, 90, fds, 10); if (ret != 10) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 100, 0); if (fds) close_files(fds, 10, 1); return 0; err: close_files(files, 100, 0); if (fds) close_files(fds, 10, 1); return 1; } static int test_removals(struct io_uring *ring) { int *files, *fds = NULL; int ret, i; files = open_files(100, 0, 0); ret = io_uring_register_files(ring, files, 100); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } fds = t_calloc(10, sizeof(int)); for (i = 0; i < 10; i++) fds[i] = -1; ret = io_uring_register_files_update(ring, 50, fds, 10); if (ret != 10) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 100, 0); if (fds) free(fds); return 0; err: close_files(files, 100, 0); if (fds) free(fds); return 1; } static int test_additions(struct io_uring *ring) { int *files, *fds = NULL; int ret; files = open_files(100, 100, 0); ret = io_uring_register_files(ring, files, 200); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } fds = open_files(2, 0, 1); ret = io_uring_register_files_update(ring, 100, fds, 2); if (ret != 2) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } close_files(files, 100, 0); if (fds) close_files(fds, 2, 1); return 0; err: close_files(files, 100, 0); if (fds) close_files(fds, 2, 1); return 1; } static int test_sparse(struct io_uring *ring) { int *files; int ret; files = open_files(100, 100, 0); ret = io_uring_register_files(ring, files, 200); if (ret) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "Sparse files not supported, skipping\n"); no_update = 1; goto done; } fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } done: close_files(files, 100, 0); return 0; err: close_files(files, 100, 0); return 1; } static int test_basic_many(struct io_uring *ring) { int *files; int ret; files = open_files(768, 0, 0); ret = io_uring_register_files(ring, files, 768); if (ret) { fprintf(stderr, "%s: register %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister %d\n", __FUNCTION__, ret); goto err; } close_files(files, 768, 0); return 0; err: close_files(files, 768, 0); return 1; } static int test_basic(struct io_uring *ring, int fail) { int *files; int ret, i; int nr_files = fail ? 10 : 100; files = open_files(nr_files, fail ? 90 : 0, 0); if (fail) { for (i = nr_files; i < nr_files + 90; i++) files[i] = -2; } ret = io_uring_register_files(ring, files, 100); if (ret) { if (fail) { if (ret == -EBADF || ret == -EFAULT) return 0; } fprintf(stderr, "%s: register %d\n", __FUNCTION__, ret); goto err; } if (fail) { fprintf(stderr, "Registration succeeded, but expected fail\n"); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister %d\n", __FUNCTION__, ret); goto err; } close_files(files, nr_files, 0); return 0; err: close_files(files, nr_files, 0); return 1; } /* * Register 0 files, but reserve space for 10. Then add one file. */ static int test_zero(struct io_uring *ring) { int *files, *fds = NULL; int ret; files = open_files(0, 10, 0); ret = io_uring_register_files(ring, files, 10); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } fds = open_files(1, 0, 1); ret = io_uring_register_files_update(ring, 0, fds, 1); if (ret != 1) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } if (fds) close_files(fds, 1, 1); free(files); return 0; err: if (fds) close_files(fds, 1, 1); free(files); return 1; } static int test_fixed_read_write(struct io_uring *ring, int index) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov[2]; int ret; iov[0].iov_base = t_malloc(4096); iov[0].iov_len = 4096; memset(iov[0].iov_base, 0x5a, 4096); iov[1].iov_base = t_malloc(4096); iov[1].iov_len = 4096; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_writev(sqe, index, &iov[0], 1, 0); sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 1; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); return 1; } if (cqe->res != 4096) { fprintf(stderr, "%s: write cqe->res=%d\n", __FUNCTION__, cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_readv(sqe, index, &iov[1], 1, 0); sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); return 1; } if (cqe->res != 4096) { fprintf(stderr, "%s: read cqe->res=%d\n", __FUNCTION__, cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); if (memcmp(iov[1].iov_base, iov[0].iov_base, 4096)) { fprintf(stderr, "%s: data mismatch\n", __FUNCTION__); return 1; } free(iov[0].iov_base); free(iov[1].iov_base); return 0; } static void adjust_nfiles(int want_files) { struct rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) return; if (rlim.rlim_cur >= want_files) return; rlim.rlim_cur = want_files; setrlimit(RLIMIT_NOFILE, &rlim); } /* * Register 8K of sparse files, update one at a random spot, then do some * file IO to verify it works. */ static int test_huge(struct io_uring *ring) { int *files; int ret; adjust_nfiles(16384); files = open_files(0, 8192, 0); ret = io_uring_register_files(ring, files, 8192); if (ret) { /* huge sets not supported */ if (ret == -EMFILE) { fprintf(stdout, "%s: No huge file set support, skipping\n", __FUNCTION__); goto out; } fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } files[7193] = open(".reg.7193", O_RDWR | O_CREAT, 0644); if (files[7193] < 0) { fprintf(stderr, "%s: open=%d\n", __FUNCTION__, errno); goto err; } ret = io_uring_register_files_update(ring, 7193, &files[7193], 1); if (ret != 1) { fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } if (test_fixed_read_write(ring, 7193)) goto err; ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } if (files[7193] != -1) { close(files[7193]); unlink(".reg.7193"); } out: free(files); return 0; err: if (files[7193] != -1) { close(files[7193]); unlink(".reg.7193"); } free(files); return 1; } static int test_skip(struct io_uring *ring) { int *files; int ret; files = open_files(100, 0, 0); ret = io_uring_register_files(ring, files, 100); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); goto err; } files[90] = IORING_REGISTER_FILES_SKIP; ret = io_uring_register_files_update(ring, 90, &files[90], 1); if (ret != 1) { if (ret == -EBADF) { fprintf(stdout, "Skipping files not supported\n"); goto done; } fprintf(stderr, "%s: update ret=%d\n", __FUNCTION__, ret); goto err; } /* verify can still use file index 90 */ if (test_fixed_read_write(ring, 90)) goto err; ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "%s: unregister ret=%d\n", __FUNCTION__, ret); goto err; } done: close_files(files, 100, 0); return 0; err: close_files(files, 100, 0); return 1; } static int test_sparse_updates(void) { struct io_uring ring; int ret, i, *fds, newfd; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return ret; } fds = t_malloc(256 * sizeof(int)); for (i = 0; i < 256; i++) fds[i] = -1; ret = io_uring_register_files(&ring, fds, 256); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } newfd = 1; for (i = 0; i < 256; i++) { ret = io_uring_register_files_update(&ring, i, &newfd, 1); if (ret != 1) { fprintf(stderr, "file_update: %d\n", ret); return ret; } } io_uring_unregister_files(&ring); for (i = 0; i < 256; i++) fds[i] = 1; ret = io_uring_register_files(&ring, fds, 256); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } newfd = -1; for (i = 0; i < 256; i++) { ret = io_uring_register_files_update(&ring, i, &newfd, 1); if (ret != 1) { fprintf(stderr, "file_update: %d\n", ret); return ret; } } io_uring_unregister_files(&ring); io_uring_queue_exit(&ring); return 0; } static int test_fixed_removal_ordering(void) { char buffer[128]; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret, fd, i, fds[2]; ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { fprintf(stderr, "failed to init io_uring: %s\n", strerror(-ret)); return ret; } if (pipe(fds)) { perror("pipe"); return -1; } ret = io_uring_register_files(&ring, fds, 2); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } /* ring should have fds referenced, can close them */ close(fds[0]); close(fds[1]); sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); return 1; } /* outwait file recycling delay */ ts.tv_sec = 3; ts.tv_nsec = 0; io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); return -1; } io_uring_prep_write(sqe, 1, buffer, sizeof(buffer), 0); sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: got %d, wanted 2\n", __FUNCTION__, ret); return -1; } /* remove unused pipe end */ fd = -1; ret = io_uring_register_files_update(&ring, 0, &fd, 1); if (ret != 1) { fprintf(stderr, "update off=0 failed\n"); return -1; } /* remove used pipe end */ fd = -1; ret = io_uring_register_files_update(&ring, 1, &fd, 1); if (ret != 1) { fprintf(stderr, "update off=1 failed\n"); return -1; } for (i = 0; i < 2; ++i) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); return 1; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return 0; } /* mix files requiring SCM-accounting and not in a single register */ static int test_mixed_af_unix(void) { struct io_uring ring; int i, ret, fds[2]; int reg_fds[32]; int sp[2]; ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { fprintf(stderr, "failed to init io_uring: %s\n", strerror(-ret)); return ret; } if (pipe(fds)) { perror("pipe"); return -1; } if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp) != 0) { perror("Failed to create Unix-domain socket pair\n"); return 1; } for (i = 0; i < 16; i++) { reg_fds[i * 2] = fds[0]; reg_fds[i * 2 + 1] = sp[0]; } ret = io_uring_register_files(&ring, reg_fds, 32); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } close(fds[0]); close(fds[1]); close(sp[0]); close(sp[1]); io_uring_queue_exit(&ring); return 0; } static int test_partial_register_fail(void) { char buffer[128]; struct io_uring ring; int ret, fds[2]; int reg_fds[5]; ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { fprintf(stderr, "failed to init io_uring: %s\n", strerror(-ret)); return ret; } if (pipe(fds)) { perror("pipe"); return -1; } /* * Expect register to fail as it doesn't support io_uring fds, shouldn't * leave any fds referenced afterwards. */ reg_fds[0] = fds[0]; reg_fds[1] = fds[1]; reg_fds[2] = -1; reg_fds[3] = ring.ring_fd; reg_fds[4] = -1; ret = io_uring_register_files(&ring, reg_fds, 5); if (!ret) { fprintf(stderr, "file_register unexpectedly succeeded\n"); return 1; } /* ring should have fds referenced, can close them */ close(fds[1]); /* confirm that fds[1] is actually close and to ref'ed by io_uring */ ret = read(fds[0], buffer, 10); if (ret < 0) perror("read"); close(fds[0]); io_uring_queue_exit(&ring); return 0; } static int file_update_alloc(struct io_uring *ring, int *fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); io_uring_prep_files_update(sqe, fd, 1, IORING_FILE_INDEX_ALLOC); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: io_uring_wait_cqe=%d\n", __FUNCTION__, ret); return -1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; } static int test_out_of_range_file_ranges(struct io_uring *ring) { int ret; ret = io_uring_register_file_alloc_range(ring, 8, 3); if (ret != -EINVAL) { fprintf(stderr, "overlapping range %i\n", ret); return 1; } ret = io_uring_register_file_alloc_range(ring, 10, 1); if (ret != -EINVAL) { fprintf(stderr, "out of range index %i\n", ret); return 1; } ret = io_uring_register_file_alloc_range(ring, 7, ~1U); if (ret != -EOVERFLOW) { fprintf(stderr, "overflow %i\n", ret); return 1; } return 0; } static int test_overallocating_file_range(struct io_uring *ring, int fds[2]) { int roff = 7, rlen = 2; int ret, i, fd; ret = io_uring_register_file_alloc_range(ring, roff, rlen); if (ret) { fprintf(stderr, "io_uring_register_file_alloc_range %i\n", ret); return 1; } for (i = 0; i < rlen; i++) { fd = fds[0]; ret = file_update_alloc(ring, &fd); if (ret != 1) { fprintf(stderr, "file_update_alloc\n"); return 1; } if (fd < roff || fd >= roff + rlen) { fprintf(stderr, "invalid off result %i\n", fd); return 1; } } fd = fds[0]; ret = file_update_alloc(ring, &fd); if (ret != -ENFILE) { fprintf(stderr, "overallocated %i, off %i\n", ret, fd); return 1; } return 0; } static int test_zero_range_alloc(struct io_uring *ring, int fds[2]) { int ret, fd; ret = io_uring_register_file_alloc_range(ring, 7, 0); if (ret) { fprintf(stderr, "io_uring_register_file_alloc_range failed %i\n", ret); return 1; } fd = fds[0]; ret = file_update_alloc(ring, &fd); if (ret != -ENFILE) { fprintf(stderr, "zero alloc %i\n", ret); return 1; } return 0; } static int test_defer_taskrun(void) { struct io_uring_sqe *sqe; struct io_uring ring; int ret, fds[2]; char buff = 'x'; ret = io_uring_queue_init(8, &ring, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER); if (ret) { fprintf(stderr, "ring init\n"); return 1; } ret = pipe(fds); if (ret) { fprintf(stderr, "bad pipes\n"); return 1; } ret = io_uring_register_files(&ring, &fds[0], 2); if (ret) { fprintf(stderr, "bad register %d\n", ret); return 1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, 0, &buff, 1, 0); sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "bad submit\n"); return 1; } ret = write(fds[1], &buff, 1); if (ret != 1) { fprintf(stderr, "bad pipe write\n"); return 1; } ret = io_uring_unregister_files(&ring); if (ret) { fprintf(stderr, "bad unregister %d\n", ret); return 1; } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } static int test_file_alloc_ranges(void) { struct io_uring ring; int ret, pipe_fds[2]; if (pipe(pipe_fds)) { fprintf(stderr, "pipes\n"); return 1; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return 1; } ret = io_uring_register_files_sparse(&ring, 10); if (ret == -EINVAL) { not_supported: close(pipe_fds[0]); close(pipe_fds[1]); io_uring_queue_exit(&ring); printf("file alloc ranges are not supported, skip\n"); return 0; } else if (ret) { fprintf(stderr, "io_uring_register_files_sparse %i\n", ret); return ret; } ret = io_uring_register_file_alloc_range(&ring, 0, 1); if (ret) { if (ret == -EINVAL) goto not_supported; fprintf(stderr, "io_uring_register_file_alloc_range %i\n", ret); return 1; } ret = test_overallocating_file_range(&ring, pipe_fds); if (ret) { fprintf(stderr, "test_overallocating_file_range() failed\n"); return 1; } ret = test_out_of_range_file_ranges(&ring); if (ret) { fprintf(stderr, "test_out_of_range_file_ranges() failed\n"); return 1; } ret = test_zero_range_alloc(&ring, pipe_fds); if (ret) { fprintf(stderr, "test_zero_range_alloc() failed\n"); return 1; } close(pipe_fds[0]); close(pipe_fds[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } ret = test_basic(&ring, 0); if (ret) { fprintf(stderr, "test_basic failed\n"); return T_EXIT_FAIL; } ret = test_basic(&ring, 1); if (ret) { fprintf(stderr, "test_basic failed\n"); return T_EXIT_FAIL; } ret = test_basic_many(&ring); if (ret) { fprintf(stderr, "test_basic_many failed\n"); return T_EXIT_FAIL; } ret = test_sparse(&ring); if (ret) { fprintf(stderr, "test_sparse failed\n"); return T_EXIT_FAIL; } if (no_update) return T_EXIT_SKIP; ret = test_additions(&ring); if (ret) { fprintf(stderr, "test_additions failed\n"); return T_EXIT_FAIL; } ret = test_removals(&ring); if (ret) { fprintf(stderr, "test_removals failed\n"); return T_EXIT_FAIL; } ret = test_replace(&ring); if (ret) { fprintf(stderr, "test_replace failed\n"); return T_EXIT_FAIL; } ret = test_replace_all(&ring); if (ret) { fprintf(stderr, "test_replace_all failed\n"); return T_EXIT_FAIL; } ret = test_grow(&ring); if (ret) { fprintf(stderr, "test_grow failed\n"); return T_EXIT_FAIL; } ret = test_shrink(&ring); if (ret) { fprintf(stderr, "test_shrink failed\n"); return T_EXIT_FAIL; } ret = test_zero(&ring); if (ret) { fprintf(stderr, "test_zero failed\n"); return T_EXIT_FAIL; } ret = test_huge(&ring); if (ret) { fprintf(stderr, "test_huge failed\n"); return T_EXIT_FAIL; } ret = test_skip(&ring); if (ret) { fprintf(stderr, "test_skip failed\n"); return T_EXIT_FAIL; } ret = test_sparse_updates(); if (ret) { fprintf(stderr, "test_sparse_updates failed\n"); return T_EXIT_FAIL; } ret = test_fixed_removal_ordering(); if (ret) { fprintf(stderr, "test_fixed_removal_ordering failed\n"); return T_EXIT_FAIL; } ret = test_mixed_af_unix(); if (ret) { fprintf(stderr, "test_mixed_af_unix failed\n"); return T_EXIT_FAIL; } ret = test_partial_register_fail(); if (ret) { fprintf(stderr, "test_partial_register_fail failed\n"); return T_EXIT_FAIL; } ret = test_file_alloc_ranges(); if (ret) { fprintf(stderr, "test_partial_register_fail failed\n"); return T_EXIT_FAIL; } if (t_probe_defer_taskrun()) { ret = test_defer_taskrun(); if (ret) { fprintf(stderr, "test_defer_taskrun failed\n"); return T_EXIT_FAIL; } } return T_EXIT_PASS; } liburing-2.6/test/file-update.c000066400000000000000000000112561461424365000165300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various file registration tests * */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static void close_files(int *files, int nr_files, int add) { char fname[32]; int i; for (i = 0; i < nr_files; i++) { if (files) close(files[i]); if (!add) sprintf(fname, ".reg.%d", i); else sprintf(fname, ".add.%d", i + add); unlink(fname); } if (files) free(files); } static int *open_files(int nr_files, int extra, int add) { char fname[32]; int *files; int i; files = t_calloc(nr_files + extra, sizeof(int)); for (i = 0; i < nr_files; i++) { if (!add) sprintf(fname, ".reg.%d", i); else sprintf(fname, ".add.%d", i + add); files[i] = open(fname, O_RDWR | O_CREAT, 0644); if (files[i] < 0) { perror("open"); free(files); files = NULL; break; } } if (extra) { for (i = nr_files; i < nr_files + extra; i++) files[i] = -1; } return files; } static int test_update_multiring(struct io_uring *r1, struct io_uring *r2, struct io_uring *r3, int do_unreg) { int *fds, *newfds; fds = open_files(10, 0, 0); newfds = open_files(10, 0, 1); if (io_uring_register_files(r1, fds, 10) || io_uring_register_files(r2, fds, 10) || io_uring_register_files(r3, fds, 10)) { fprintf(stderr, "%s: register files failed\n", __FUNCTION__); goto err; } if (io_uring_register_files_update(r1, 0, newfds, 10) != 10 || io_uring_register_files_update(r2, 0, newfds, 10) != 10 || io_uring_register_files_update(r3, 0, newfds, 10) != 10) { fprintf(stderr, "%s: update files failed\n", __FUNCTION__); goto err; } if (!do_unreg) goto done; if (io_uring_unregister_files(r1) || io_uring_unregister_files(r2) || io_uring_unregister_files(r3)) { fprintf(stderr, "%s: unregister files failed\n", __FUNCTION__); goto err; } done: close_files(fds, 10, 0); close_files(newfds, 10, 1); return 0; err: close_files(fds, 10, 0); close_files(newfds, 10, 1); return 1; } static int test_sqe_update(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int *fds, i, ret; fds = t_malloc(sizeof(int) * 10); for (i = 0; i < 10; i++) fds[i] = -1; sqe = io_uring_get_sqe(ring); io_uring_prep_files_update(sqe, fds, 10, 0); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return 1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); free(fds); if (ret == -EINVAL) { fprintf(stdout, "IORING_OP_FILES_UPDATE not supported, skipping\n"); return T_EXIT_SKIP; } return ret != 10; } static int test_update_no_table(void) { int up_fd, fds[4] = {-1, 0, 1, 4}; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret; ret = t_create_ring(2, &ring, 0); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret != T_SETUP_OK) return ret; ret = io_uring_register_files(&ring, fds, 4); /* ignore other failures */ if (ret && ret != -EBADF) { fprintf(stderr, "Failed registering file table: %d\n", ret); goto fail; } sqe = io_uring_get_sqe(&ring); up_fd = ring.ring_fd; io_uring_prep_files_update(sqe, &up_fd, 1, -1); //offset = -1 ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "Failed submit: %d\n", ret); goto fail; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "Failed wait: %d\n", ret); goto fail; } ret = cqe->res; io_uring_cqe_seen(&ring, cqe); if (ret != -EMFILE && ret != -EINVAL && ret != -EOVERFLOW && ret != -ENXIO) { fprintf(stderr, "Bad cqe res: %d\n", ret); goto fail; } io_uring_queue_exit(&ring); return T_EXIT_PASS; fail: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } int main(int argc, char *argv[]) { struct io_uring r1, r2, r3; int ret; if (argc > 1) return T_EXIT_SKIP; if (io_uring_queue_init(8, &r1, 0) || io_uring_queue_init(8, &r2, 0) || io_uring_queue_init(8, &r3, 0)) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_update_multiring(&r1, &r2, &r3, 1); if (ret) { fprintf(stderr, "test_update_multiring w/unreg\n"); return ret; } ret = test_update_multiring(&r1, &r2, &r3, 0); if (ret) { fprintf(stderr, "test_update_multiring wo/unreg\n"); return ret; } ret = test_sqe_update(&r1); if (ret) { if (ret != T_EXIT_SKIP) fprintf(stderr, "test_sqe_update failed\n"); return ret; } ret = test_update_no_table(); if (ret) { if (ret != T_EXIT_SKIP) fprintf(stderr, "test_sqe_update failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/file-verify.c000066400000000000000000000316331461424365000165530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various reads tests, verifying data * */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FSIZE 128*1024*1024 #define CHUNK_SIZE 131072 #define PUNCH_SIZE 32768 /* * 8 because it fits within the on-stack iov, 16 because it's larger than 8 */ #define MIN_VECS 8 #define MAX_VECS 16 /* * Can be anything, let's just do something for a bit of parallelism */ #define READ_BATCH 16 static void verify_buf_sync(void *buf, size_t size, bool registered) { #if defined(__hppa__) if (registered) { unsigned long off = (unsigned long) buf & 4095; unsigned long p = (unsigned long) buf & ~4095; int i; size += off; for (i = 0; i < size; i += 32) asm volatile("fdc 0(%0)" : : "r" (p + i)); } #endif } /* * Each offset in the file has the offset / sizeof(int) stored for every * sizeof(int) address. */ static int verify_buf(void *buf, size_t size, off_t off, bool registered) { int i, u_in_buf = size / sizeof(unsigned int); unsigned int *ptr; verify_buf_sync(buf, size, registered); off /= sizeof(unsigned int); ptr = buf; for (i = 0; i < u_in_buf; i++) { if (off != *ptr) { fprintf(stderr, "Found %u, wanted %llu\n", *ptr, (unsigned long long) off); return 1; } ptr++; off++; } return 0; } static int test_truncate(struct io_uring *ring, const char *fname, int buffered, int vectored, int provide_buf) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec vec; struct stat sb; off_t punch_off, off, file_size; void *buf = NULL; int u_in_buf, i, ret, fd, first_pass = 1; unsigned int *ptr; if (buffered) fd = open(fname, O_RDWR); else fd = open(fname, O_DIRECT | O_RDWR); if (fd < 0) { perror("open"); return 1; } if (fstat(fd, &sb) < 0) { perror("stat"); close(fd); return 1; } if (S_ISREG(sb.st_mode)) { file_size = sb.st_size; } else if (S_ISBLK(sb.st_mode)) { unsigned long long bytes; if (ioctl(fd, BLKGETSIZE64, &bytes) < 0) { perror("ioctl"); close(fd); return 1; } file_size = bytes; } else { goto out; } if (file_size < CHUNK_SIZE) goto out; t_posix_memalign(&buf, 4096, CHUNK_SIZE); off = file_size - (CHUNK_SIZE / 2); punch_off = off + CHUNK_SIZE / 4; u_in_buf = CHUNK_SIZE / sizeof(unsigned int); ptr = buf; for (i = 0; i < u_in_buf; i++) { *ptr = i; ptr++; } ret = pwrite(fd, buf, CHUNK_SIZE / 2, off); if (ret < 0) { perror("pwrite"); goto err; } else if (ret != CHUNK_SIZE / 2) goto out; again: /* * Read in last bit of file so it's known cached, then remove half of that * last bit so we get a short read that needs retry */ ret = pread(fd, buf, CHUNK_SIZE / 2, off); if (ret < 0) { perror("pread"); goto err; } else if (ret != CHUNK_SIZE / 2) goto out; if (posix_fadvise(fd, punch_off, CHUNK_SIZE / 4, POSIX_FADV_DONTNEED) < 0) { perror("posix_fadivse"); goto err; } if (provide_buf) { sqe = io_uring_get_sqe(ring); io_uring_prep_provide_buffers(sqe, buf, CHUNK_SIZE, 1, 0, 0); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit failed %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret) { fprintf(stderr, "Provide buffer failed %d\n", ret); goto err; } } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } if (vectored) { assert(!provide_buf); vec.iov_base = buf; vec.iov_len = CHUNK_SIZE; io_uring_prep_readv(sqe, fd, &vec, 1, off); } else { if (provide_buf) { io_uring_prep_read(sqe, fd, NULL, CHUNK_SIZE, off); sqe->flags |= IOSQE_BUFFER_SELECT; } else { io_uring_prep_read(sqe, fd, buf, CHUNK_SIZE, off); } } memset(buf, 0, CHUNK_SIZE); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "Submit failed %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret != CHUNK_SIZE / 2) { fprintf(stderr, "Unexpected truncated read %d\n", ret); goto err; } if (verify_buf(buf, CHUNK_SIZE / 2, 0, false)) goto err; /* * Repeat, but punch first part instead of last */ if (first_pass) { punch_off = file_size - CHUNK_SIZE / 4; first_pass = 0; goto again; } out: free(buf); close(fd); return 0; err: free(buf); close(fd); return 1; } enum { PUNCH_NONE, PUNCH_FRONT, PUNCH_MIDDLE, PUNCH_END, }; /* * For each chunk in file, DONTNEED a start, end, or middle segment of it. * We enter here with the file fully cached every time, either freshly * written or after other reads. This forces (at least) the buffered reads * to be handled incrementally, exercising that path. */ static int do_punch(int fd) { off_t offset = 0; int punch_type; while (offset + CHUNK_SIZE <= FSIZE) { off_t punch_off; punch_type = rand() % (PUNCH_END + 1); switch (punch_type) { default: case PUNCH_NONE: punch_off = -1; /* gcc... */ break; case PUNCH_FRONT: punch_off = offset; break; case PUNCH_MIDDLE: punch_off = offset + PUNCH_SIZE; break; case PUNCH_END: punch_off = offset + CHUNK_SIZE - PUNCH_SIZE; break; } offset += CHUNK_SIZE; if (punch_type == PUNCH_NONE) continue; if (posix_fadvise(fd, punch_off, PUNCH_SIZE, POSIX_FADV_DONTNEED) < 0) { perror("posix_fadivse"); return 1; } } return 0; } static int provide_buffers(struct io_uring *ring, void **buf) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int i, ret; /* real use case would have one buffer chopped up, but... */ for (i = 0; i < READ_BATCH; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_provide_buffers(sqe, buf[i], CHUNK_SIZE, 1, 0, i); } ret = io_uring_submit(ring); if (ret != READ_BATCH) { fprintf(stderr, "Submit failed %d\n", ret); return 1; } for (i = 0; i < READ_BATCH; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe res provide %d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); } return 0; } static int test(struct io_uring *ring, const char *fname, int buffered, int vectored, int small_vecs, int registered, int provide) { struct iovec vecs[READ_BATCH][MAX_VECS]; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; void *buf[READ_BATCH]; int ret, fd, flags; int i, j, nr_vecs; off_t off, voff; size_t left; if (registered) { assert(!provide); assert(!vectored && !small_vecs); } if (provide) { assert(!registered); assert(!vectored && !small_vecs); } flags = O_RDONLY; if (!buffered) flags |= O_DIRECT; fd = open(fname, flags); if (fd < 0) { perror("open"); return 1; } if (do_punch(fd)) return 1; if (vectored) { if (small_vecs) nr_vecs = MIN_VECS; else nr_vecs = MAX_VECS; for (j = 0; j < READ_BATCH; j++) { for (i = 0; i < nr_vecs; i++) { void *ptr; t_posix_memalign(&ptr, 4096, CHUNK_SIZE / nr_vecs); vecs[j][i].iov_base = ptr; vecs[j][i].iov_len = CHUNK_SIZE / nr_vecs; } } } else { for (j = 0; j < READ_BATCH; j++) t_posix_memalign(&buf[j], 4096, CHUNK_SIZE); nr_vecs = 0; } if (registered) { struct iovec v[READ_BATCH]; for (i = 0; i < READ_BATCH; i++) { v[i].iov_base = buf[i]; v[i].iov_len = CHUNK_SIZE; } ret = t_register_buffers(ring, v, READ_BATCH); if (ret) { if (ret == T_SETUP_SKIP) { ret = 0; goto free_bufs; } goto err; } } i = 0; left = FSIZE; off = 0; while (left) { int pending = 0; if (provide && provide_buffers(ring, buf)) goto err; for (i = 0; i < READ_BATCH; i++) { size_t this = left; if (this > CHUNK_SIZE) this = CHUNK_SIZE; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } if (vectored) { io_uring_prep_readv(sqe, fd, vecs[i], nr_vecs, off); } else { if (registered) { io_uring_prep_read_fixed(sqe, fd, buf[i], this, off, i); } else if (provide) { io_uring_prep_read(sqe, fd, NULL, this, off); sqe->flags |= IOSQE_BUFFER_SELECT; } else { io_uring_prep_read(sqe, fd, buf[i], this, off); } } sqe->user_data = ((uint64_t)off << 32) | i; off += this; left -= this; pending++; if (!left) break; } ret = io_uring_submit(ring); if (ret != pending) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < pending; i++) { int index; ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res < 0) { fprintf(stderr, "bad read %d, read %d\n", cqe->res, i); goto err; } if (cqe->res < CHUNK_SIZE) { fprintf(stderr, "short read %d, read %d\n", cqe->res, i); goto err; } if (cqe->flags & IORING_CQE_F_BUFFER) index = cqe->flags >> 16; else index = cqe->user_data & 0xffffffff; voff = cqe->user_data >> 32; io_uring_cqe_seen(ring, cqe); if (vectored) { for (j = 0; j < nr_vecs; j++) { void *buf = vecs[index][j].iov_base; size_t len = vecs[index][j].iov_len; if (verify_buf(buf, len, voff, registered)) goto err; voff += len; } } else { if (verify_buf(buf[index], CHUNK_SIZE, voff, registered)) goto err; } } } ret = 0; done: if (registered) io_uring_unregister_buffers(ring); free_bufs: if (vectored) { for (j = 0; j < READ_BATCH; j++) for (i = 0; i < nr_vecs; i++) free(vecs[j][i].iov_base); } else { for (j = 0; j < READ_BATCH; j++) free(buf[j]); } close(fd); return ret; err: ret = 1; goto done; } static int fill_pattern(const char *fname) { size_t left = FSIZE; unsigned int val, *ptr; void *buf; int fd, i; fd = open(fname, O_WRONLY); if (fd < 0) { perror("open"); return 1; } val = 0; buf = t_malloc(4096); while (left) { int u_in_buf = 4096 / sizeof(val); size_t this = left; if (this > 4096) this = 4096; ptr = buf; for (i = 0; i < u_in_buf; i++) { *ptr = val; val++; ptr++; } if (write(fd, buf, 4096) != 4096) return 1; left -= 4096; } fsync(fd); close(fd); free(buf); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; const char *fname; char buf[32]; int ret; srand(getpid()); if (argc > 1) { fname = argv[1]; } else { sprintf(buf, ".file-verify.%d", getpid()); fname = buf; t_create_file(fname, FSIZE); } ret = io_uring_queue_init(READ_BATCH, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); goto err; } if (fill_pattern(fname)) goto err; ret = test(&ring, fname, 1, 0, 0, 0, 0); if (ret) { fprintf(stderr, "Buffered novec test failed\n"); goto err; } ret = test(&ring, fname, 1, 0, 0, 1, 0); if (ret) { fprintf(stderr, "Buffered novec reg test failed\n"); goto err; } ret = test(&ring, fname, 1, 0, 0, 0, 1); if (ret) { fprintf(stderr, "Buffered novec provide test failed\n"); goto err; } ret = test(&ring, fname, 1, 1, 0, 0, 0); if (ret) { fprintf(stderr, "Buffered vec test failed\n"); goto err; } ret = test(&ring, fname, 1, 1, 1, 0, 0); if (ret) { fprintf(stderr, "Buffered small vec test failed\n"); goto err; } ret = test(&ring, fname, 0, 0, 0, 0, 0); if (ret) { fprintf(stderr, "O_DIRECT novec test failed\n"); goto err; } ret = test(&ring, fname, 0, 0, 0, 1, 0); if (ret) { fprintf(stderr, "O_DIRECT novec reg test failed\n"); goto err; } ret = test(&ring, fname, 0, 0, 0, 0, 1); if (ret) { fprintf(stderr, "O_DIRECT novec provide test failed\n"); goto err; } ret = test(&ring, fname, 0, 1, 0, 0, 0); if (ret) { fprintf(stderr, "O_DIRECT vec test failed\n"); goto err; } ret = test(&ring, fname, 0, 1, 1, 0, 0); if (ret) { fprintf(stderr, "O_DIRECT small vec test failed\n"); goto err; } ret = test_truncate(&ring, fname, 1, 0, 0); if (ret) { fprintf(stderr, "Buffered end truncate read failed\n"); goto err; } ret = test_truncate(&ring, fname, 1, 1, 0); if (ret) { fprintf(stderr, "Buffered end truncate vec read failed\n"); goto err; } ret = test_truncate(&ring, fname, 1, 0, 1); if (ret) { fprintf(stderr, "Buffered end truncate pbuf read failed\n"); goto err; } ret = test_truncate(&ring, fname, 0, 0, 0); if (ret) { fprintf(stderr, "O_DIRECT end truncate read failed\n"); goto err; } ret = test_truncate(&ring, fname, 0, 1, 0); if (ret) { fprintf(stderr, "O_DIRECT end truncate vec read failed\n"); goto err; } ret = test_truncate(&ring, fname, 0, 0, 1); if (ret) { fprintf(stderr, "O_DIRECT end truncate pbuf read failed\n"); goto err; } if (buf == fname) unlink(fname); return T_EXIT_PASS; err: if (buf == fname) unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/files-exit-hang-poll.c000066400000000000000000000044151461424365000202600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Based on a test case from Josef Grieb - test that we can exit without * hanging if we have the task file table pinned by a request that is linked * to another request that doesn't finish. */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BACKLOG 512 static struct io_uring ring; static void add_poll(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_poll_add(sqe, fd, POLLIN); sqe->flags |= IOSQE_IO_LINK; } static void add_accept(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_accept(sqe, fd, 0, 0, SOCK_NONBLOCK | SOCK_CLOEXEC); } static int setup_io_uring(void) { int ret; ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret)); return 1; } return 0; } static void alarm_sig(int sig) { exit(0); } int main(int argc, char *argv[]) { struct sockaddr_in serv_addr; struct io_uring_cqe *cqe; int ret, sock_listen_fd; const int val = 1; if (argc > 1) return T_EXIT_SKIP; sock_listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (sock_listen_fd < 0) { perror("socket"); return T_EXIT_FAIL; } setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); memset(&serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; if (t_bind_ephemeral_port(sock_listen_fd, &serv_addr)) { perror("bind"); return T_EXIT_FAIL; } if (listen(sock_listen_fd, BACKLOG) < 0) { perror("Error listening on socket\n"); return T_EXIT_FAIL; } if (setup_io_uring()) return T_EXIT_FAIL; add_poll(&ring, sock_listen_fd); add_accept(&ring, sock_listen_fd); ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "submit=%d\n", ret); return T_EXIT_FAIL; } signal(SIGALRM, alarm_sig); alarm(1); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/files-exit-hang-timeout.c000066400000000000000000000053421461424365000210000ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Based on a test case from Josef Grieb - test that we can exit without * hanging if we have the task file table pinned by a request that is linked * to another request that doesn't finish. */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BACKLOG 512 #define PORT 9100 static struct io_uring ring; static struct __kernel_timespec ts = { .tv_sec = 300, .tv_nsec = 0, }; static void add_timeout(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_timeout(sqe, &ts, 100, 0); sqe->flags |= IOSQE_IO_LINK; } static void add_accept(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_accept(sqe, fd, 0, 0, SOCK_NONBLOCK | SOCK_CLOEXEC); sqe->flags |= IOSQE_IO_LINK; } static int setup_io_uring(void) { int ret; ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret)); return 1; } return 0; } static void alarm_sig(int sig) { exit(0); } int main(int argc, char *argv[]) { struct sockaddr_in serv_addr; struct io_uring_cqe *cqe; int ret, sock_listen_fd; const int val = 1; int i; if (argc > 1) return T_EXIT_SKIP; sock_listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); if (sock_listen_fd < 0) { perror("socket"); return T_EXIT_FAIL; } setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); memset(&serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; for (i = 0; i < 100; i++) { serv_addr.sin_port = htons(PORT + i); ret = bind(sock_listen_fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)); if (!ret) break; if (errno != EADDRINUSE) { fprintf(stderr, "bind: %s\n", strerror(errno)); return T_EXIT_FAIL; } if (i == 99) { printf("Gave up on finding a port, skipping\n"); goto skip; } } if (listen(sock_listen_fd, BACKLOG) < 0) { perror("Error listening on socket\n"); return T_EXIT_FAIL; } if (setup_io_uring()) return T_EXIT_FAIL; add_timeout(&ring, sock_listen_fd); add_accept(&ring, sock_listen_fd); ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "submit=%d\n", ret); return T_EXIT_FAIL; } signal(SIGALRM, alarm_sig); alarm(1); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; skip: io_uring_queue_exit(&ring); return T_EXIT_SKIP; } liburing-2.6/test/fixed-buf-iter.c000066400000000000000000000045151461424365000171430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test fixed buffers with non-iterators. * * Taken from: https://github.com/axboe/liburing/issues/549 */ #include #include #include #include #include "liburing.h" #include "helpers.h" #define BUF_SIZE 4096 #define BUFFERS 1 #define IN_FD "/dev/urandom" #define OUT_FD "/dev/zero" static int test(struct io_uring *ring) { struct iovec iov[BUFFERS]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fd_in, fd_out, i; fd_in = open(IN_FD, O_RDONLY, 0644); if (fd_in < 0) { perror("open in"); return 1; } fd_out = open(OUT_FD, O_RDWR, 0644); if (fd_out < 0) { perror("open out"); return 1; } for (i = 0; i < BUFFERS; i++) { iov[i].iov_base = malloc(BUF_SIZE); iov[i].iov_len = BUF_SIZE; memset(iov[i].iov_base, 0, BUF_SIZE); } ret = io_uring_register_buffers(ring, iov, BUFFERS); if (ret) { fprintf(stderr, "Error registering buffers: %s", strerror(-ret)); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Could not get SQE.\n"); return 1; } io_uring_prep_read_fixed(sqe, fd_in, iov[0].iov_base, BUF_SIZE, 0, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret)); return 1; } if (cqe->res < 0) { fprintf(stderr, "Error in async operation: %s\n", strerror(-cqe->res)); return 1; } io_uring_cqe_seen(ring, cqe); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Could not get SQE.\n"); return 1; } io_uring_prep_write_fixed(sqe, fd_out, iov[0].iov_base, BUF_SIZE, 0, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret)); return 1; } if (cqe->res < 0) { fprintf(stderr, "Error in async operation: %s\n", strerror(-cqe->res)); return 1; } io_uring_cqe_seen(ring, cqe); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = t_create_ring(8, &ring, 0); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret < 0) return T_EXIT_FAIL; ret = test(&ring); if (ret) { fprintf(stderr, "Test failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/fixed-buf-merge.c000066400000000000000000000037451461424365000173030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test fixed buffer merging/skipping * * Taken from: https://github.com/axboe/liburing/issues/994 * */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { int ret, i, fd, initial_offset = 4096, num_requests = 3; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec iov; char *buffer, *to_free; unsigned head; char filename[64]; ret = io_uring_queue_init(4, &ring, 0); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return T_EXIT_FAIL; } sprintf(filename, ".fixed-buf-%d", getpid()); t_create_file(filename, 4 * 4096); fd = open(filename, O_RDONLY | O_DIRECT, 0644); if (fd < 0) { perror("open"); goto err_unlink; } to_free = buffer = aligned_alloc(4096, 128 * 4096); if (!buffer) { perror("aligned_alloc"); goto err_unlink; } /* Register buffer */ iov.iov_base = buffer; iov.iov_len = 128 * 4096; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { fprintf(stderr, "buf register: %d\n", ret); goto err_unlink; } /* Prepare read requests */ buffer += initial_offset; for (i = 0; i < num_requests; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_read_fixed(sqe, fd, buffer, 4096, 4096 * i, 0); buffer += 4096; } /* Submit requests and reap completions */ ret = io_uring_submit_and_wait(&ring, num_requests); if (ret != num_requests) { fprintf(stderr, "Submit and wait: %d\n", ret); goto err_unlink; } i = 0; io_uring_for_each_cqe(&ring, head, cqe) { if (cqe->res != 4096) { fprintf(stderr, "cqe: %d\n", cqe->res); goto err_unlink; } i++; } if (i != num_requests) { fprintf(stderr, "Got %d completions\n", i); goto err_unlink; } io_uring_cq_advance(&ring, i); io_uring_queue_exit(&ring); close(fd); free(to_free); unlink(filename); return T_EXIT_PASS; err_unlink: unlink(filename); return T_EXIT_FAIL; } liburing-2.6/test/fixed-link.c000066400000000000000000000037321461424365000163630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define IOVECS_LEN 2 int main(int argc, char *argv[]) { struct iovec iovecs[IOVECS_LEN]; struct io_uring ring; int i, fd, ret; if (argc > 1) return T_EXIT_SKIP; fd = open("/dev/zero", O_RDONLY); if (fd < 0) { fprintf(stderr, "Failed to open /dev/zero\n"); return T_EXIT_FAIL; } if (io_uring_queue_init(32, &ring, 0) < 0) { fprintf(stderr, "Failed to init io_uring\n"); close(fd); return T_EXIT_FAIL; } for (i = 0; i < IOVECS_LEN; ++i) { iovecs[i].iov_base = t_malloc(64); iovecs[i].iov_len = 64; } ret = io_uring_register_buffers(&ring, iovecs, IOVECS_LEN); if (ret) { fprintf(stderr, "Failed to register buffers\n"); return T_EXIT_FAIL; } for (i = 0; i < IOVECS_LEN; ++i) { struct io_uring_sqe *sqe = io_uring_get_sqe(&ring); const char *str = "#include "; iovecs[i].iov_len = strlen(str); io_uring_prep_read_fixed(sqe, fd, iovecs[i].iov_base, strlen(str), 0, i); if (i == 0) io_uring_sqe_set_flags(sqe, IOSQE_IO_LINK); io_uring_sqe_set_data(sqe, (void *)str); } ret = io_uring_submit_and_wait(&ring, IOVECS_LEN); if (ret < 0) { fprintf(stderr, "Failed to submit IO\n"); return T_EXIT_FAIL; } else if (ret < 2) { fprintf(stderr, "Submitted %d, wanted %d\n", ret, IOVECS_LEN); return T_EXIT_FAIL; } for (i = 0; i < IOVECS_LEN; i++) { struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return T_EXIT_FAIL; } if (cqe->res != iovecs[i].iov_len) { fprintf(stderr, "read: wanted %ld, got %d\n", (long) iovecs[i].iov_len, cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); } close(fd); io_uring_queue_exit(&ring); for (i = 0; i < IOVECS_LEN; ++i) free(iovecs[i].iov_base); return T_EXIT_PASS; } liburing-2.6/test/fixed-reuse.c000066400000000000000000000064051461424365000165510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: link with an existing * file present in the opened slot, verifying that we get the new file * rather than the old one. * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define MAX_FILES 8 #define FNAME1 ".slot.reuse.1" #define FNAME2 ".slot.reuse.2" #define PAT1 0xaa #define PAT2 0x55 #define BSIZE 4096 static int test(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buf[BSIZE]; int ret, i; /* open FNAME1 in slot 0 */ sqe = io_uring_get_sqe(ring); io_uring_prep_openat_direct(sqe, AT_FDCWD, FNAME1, O_RDONLY, 0, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != 0) { fprintf(stderr, "open res %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); /* * Now open FNAME2 in that same slot, verifying we get data from * FNAME2 and not FNAME1. */ sqe = io_uring_get_sqe(ring); io_uring_prep_openat_direct(sqe, AT_FDCWD, FNAME2, O_RDONLY, 0, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); sqe->flags |= IOSQE_FIXED_FILE; sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 3; sqe = io_uring_get_sqe(ring); io_uring_prep_close_direct(sqe, 0); sqe->user_data = 4; ret = io_uring_submit(ring); if (ret != 3) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 2: if (cqe->res) { fprintf(stderr, "bad open %d\n", cqe->res); goto err; } break; case 3: if (cqe->res != sizeof(buf)) { fprintf(stderr, "bad read %d\n", cqe->res); goto err; } break; case 4: if (cqe->res) { fprintf(stderr, "bad close %d\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } for (i = 0; i < sizeof(buf); i++) { if (buf[i] == PAT2) continue; fprintf(stderr, "Bad pattern %x at %d\n", buf[i], i); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_params p = { }; int ret, files[MAX_FILES]; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } if (!(p.features & IORING_FEAT_CQE_SKIP)) return T_EXIT_SKIP; memset(files, -1, sizeof(files)); ret = io_uring_register_files(&ring, files, ARRAY_SIZE(files)); if (ret) { fprintf(stderr, "Failed registering files\n"); return T_EXIT_FAIL; } t_create_file_pattern(FNAME1, 4096, PAT1); t_create_file_pattern(FNAME2, 4096, PAT2); ret = test(&ring); if (ret) { fprintf(stderr, "test failed\n"); goto err; } unlink(FNAME1); unlink(FNAME2); return T_EXIT_PASS; err: unlink(FNAME1); unlink(FNAME2); return T_EXIT_FAIL; } liburing-2.6/test/fpos.c000066400000000000000000000127151461424365000153010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring fpos handling * */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE 5000 #define QUEUE_SIZE 2048 static void create_file(const char *file, size_t size) { ssize_t ret; char *buf; size_t idx; int fd; buf = t_malloc(size); for (idx = 0; idx < size; ++idx) { /* write 0 or 1 */ buf[idx] = (unsigned char)(idx & 0x01); } fd = open(file, O_WRONLY | O_CREAT, 0644); assert(fd >= 0); ret = write(fd, buf, size); fsync(fd); close(fd); free(buf); assert(ret == size); } static int test_read(struct io_uring *ring, bool async, int blocksize) { int ret, fd, i; bool done = false; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; loff_t current, expected = 0; int count_ok; int count_0 = 0, count_1 = 0; unsigned char buff[QUEUE_SIZE * blocksize]; unsigned char reordered[QUEUE_SIZE * blocksize]; memset(buff, 0, QUEUE_SIZE * blocksize); memset(reordered, 0, QUEUE_SIZE * blocksize); create_file(".test_fpos_read", FILE_SIZE); fd = open(".test_fpos_read", O_RDONLY); unlink(".test_fpos_read"); assert(fd >= 0); while (!done) { for (i = 0; i < QUEUE_SIZE; ++i) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "no sqe\n"); return -1; } io_uring_prep_read(sqe, fd, buff + i * blocksize, blocksize, -1); sqe->user_data = i; if (async) sqe->flags |= IOSQE_ASYNC; if (i != QUEUE_SIZE - 1) sqe->flags |= IOSQE_IO_LINK; } ret = io_uring_submit_and_wait(ring, QUEUE_SIZE); if (ret != QUEUE_SIZE) { fprintf(stderr, "submit failed: %d\n", ret); return 1; } count_ok = 0; for (i = 0; i < QUEUE_SIZE; ++i) { int res; ret = io_uring_peek_cqe(ring, &cqe); if (ret) { fprintf(stderr, "peek failed: %d\n", ret); return ret; } assert(cqe->user_data < QUEUE_SIZE); memcpy(reordered + count_ok, buff + cqe->user_data * blocksize, blocksize); res = cqe->res; io_uring_cqe_seen(ring, cqe); if (res == 0) { done = true; } else if (res == -ECANCELED) { /* cancelled, probably ok */ } else if (res < 0 || res > blocksize) { fprintf(stderr, "bad read: %d\n", res); return -1; } else { expected += res; count_ok += res; } } ret = 0; for (i = 0; i < count_ok; i++) { if (reordered[i] == 1) { count_1++; } else if (reordered[i] == 0) { count_0++; } else { fprintf(stderr, "odd read %d\n", (int)reordered[i]); ret = -1; break; } } if (labs(count_1 - count_0) > 1) { fprintf(stderr, "inconsistent reads, got 0s:%d 1s:%d\n", count_0, count_1); ret = -1; } current = lseek(fd, 0, SEEK_CUR); if (current != expected) { fprintf(stderr, "f_pos incorrect, expected %ld have %ld\n", (long) expected, (long) current); ret = -1; } if (ret) return ret; } return 0; } static int test_write(struct io_uring *ring, bool async, int blocksize) { int ret, fd, i; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; bool fail = false; loff_t current; char data[blocksize+1]; char readbuff[QUEUE_SIZE*blocksize+1]; fd = open(".test_fpos_write", O_RDWR | O_CREAT, 0644); unlink(".test_fpos_write"); assert(fd >= 0); for (i = 0; i < blocksize; i++) data[i] = 'A' + i; data[blocksize] = '\0'; for (i = 0; i < QUEUE_SIZE; ++i) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "no sqe\n"); return -1; } io_uring_prep_write(sqe, fd, data + (i % blocksize), 1, -1); sqe->user_data = 1; if (async) sqe->flags |= IOSQE_ASYNC; if (i != QUEUE_SIZE - 1) sqe->flags |= IOSQE_IO_LINK; } ret = io_uring_submit_and_wait(ring, QUEUE_SIZE); if (ret != QUEUE_SIZE) { fprintf(stderr, "submit failed: %d\n", ret); return 1; } for (i = 0; i < QUEUE_SIZE; ++i) { int res; ret = io_uring_peek_cqe(ring, &cqe); res = cqe->res; if (ret) { fprintf(stderr, "peek failed: %d\n", ret); return ret; } io_uring_cqe_seen(ring, cqe); if (!fail && res != 1) { fprintf(stderr, "bad result %d\n", res); fail = true; } } current = lseek(fd, 0, SEEK_CUR); if (current != QUEUE_SIZE) { fprintf(stderr, "f_pos incorrect, expected %ld have %d\n", (long) current, QUEUE_SIZE); fail = true; } current = lseek(fd, 0, SEEK_SET); if (current != 0) { perror("seek to start"); return -1; } ret = read(fd, readbuff, QUEUE_SIZE); if (ret != QUEUE_SIZE) { fprintf(stderr, "did not write enough: %d\n", ret); return -1; } i = 0; while (i < QUEUE_SIZE - blocksize) { if (strncmp(readbuff + i, data, blocksize)) { char bad[QUEUE_SIZE+1]; memcpy(bad, readbuff + i, blocksize); bad[blocksize] = '\0'; fprintf(stderr, "unexpected data %s\n", bad); fail = true; } i += blocksize; } return fail ? -1 : 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(QUEUE_SIZE, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } for (int test = 0; test < 8; test++) { int async = test & 0x01; int write = test & 0x02; int blocksize = test & 0x04 ? 1 : 7; ret = write ? test_write(&ring, !!async, blocksize) : test_read(&ring, !!async, blocksize); if (ret) { fprintf(stderr, "failed %s async=%d blocksize=%d\n", write ? "write" : "read", async, blocksize); return -1; } } return T_EXIT_PASS; } liburing-2.6/test/fsnotify.c000066400000000000000000000043401461424365000161660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test fsnotify access off O_DIRECT read */ #include "helpers.h" #ifdef CONFIG_HAVE_FANOTIFY #include #include #include #include #include #include #include #include "liburing.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int fan, ret, fd, err; char fname[64], *f; struct stat sb; void *buf; fan = fanotify_init(FAN_CLASS_NOTIF|FAN_CLASS_CONTENT, 0); if (fan < 0) { if (errno == ENOSYS) return T_EXIT_SKIP; if (geteuid()) return T_EXIT_SKIP; perror("fanotify_init"); return T_EXIT_FAIL; } err = T_EXIT_FAIL; if (argc > 1) { f = argv[1]; fd = open(argv[1], O_RDONLY | O_DIRECT); if (fd < 0 && errno == EINVAL) return T_EXIT_SKIP; } else { sprintf(fname, ".fsnotify.%d", getpid()); f = fname; t_create_file(fname, 8192); fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0 && errno == EINVAL) { unlink(fname); return T_EXIT_SKIP; } } if (fd < 0) { perror("open"); goto out; } if (fstat(fd, &sb) < 0) { perror("fstat"); goto out; } if ((sb.st_mode & S_IFMT) != S_IFREG) { err = T_EXIT_SKIP; close(fd); goto out; } ret = fanotify_mark(fan, FAN_MARK_ADD, FAN_ACCESS|FAN_MODIFY, fd, NULL); if (ret < 0) { perror("fanotify_mark"); goto out; } if (fork()) { int wstat; io_uring_queue_init(1, &ring, 0); if (posix_memalign(&buf, 4096, 4096)) goto out; sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, buf, 4096, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_ret=%d\n", ret); goto out; } wait(&wstat); if (!WEXITSTATUS(wstat)) err = T_EXIT_PASS; } else { struct fanotify_event_metadata m; int fret; fret = read(fan, &m, sizeof(m)); if (fret < 0) perror("fanotify read"); /* fail if mask isn't right or pid indicates non-task context */ else if (!(m.mask & 1) || !m.pid) exit(1); exit(0); } out: if (f == fname) unlink(fname); return err; } #else /* #ifdef CONFIG_HAVE_FANOTIFY */ int main(void) { return T_EXIT_SKIP; } #endif /* #ifdef CONFIG_HAVE_FANOTIFY */ liburing-2.6/test/fsync.c000066400000000000000000000077771461424365000154700ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring fsync handling * */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int test_single_fsync(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buf[32]; int fd, ret; sprintf(buf, "./XXXXXX"); fd = mkstemp(buf); if (fd < 0) { perror("open"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fsync(sqe, fd, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); unlink(buf); return 0; err: unlink(buf); return 1; } static int test_barrier_fsync(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec iovecs[4]; int i, fd, ret; off_t off; fd = open("fsync-testfile", O_WRONLY | O_CREAT, 0644); if (fd < 0) { perror("open"); return 1; } unlink("fsync-testfile"); for (i = 0; i < ARRAY_SIZE(iovecs); i++) { iovecs[i].iov_base = t_malloc(4096); iovecs[i].iov_len = 4096; } off = 0; for (i = 0; i < 4; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_writev(sqe, fd, &iovecs[i], 1, off); sqe->user_data = 0; off += 4096; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_fsync(sqe, fd, IORING_FSYNC_DATASYNC); sqe->user_data = 1; io_uring_sqe_set_flags(sqe, IOSQE_IO_DRAIN); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } else if (ret < 5) { fprintf(stderr, "Submitted only %d\n", ret); goto err; } for (i = 0; i < 5; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } /* kernel doesn't support IOSQE_IO_DRAIN */ if (cqe->res == -EINVAL) break; if (i <= 3) { if (cqe->user_data) { fprintf(stderr, "Got fsync early?\n"); goto err; } } else { if (!cqe->user_data) { fprintf(stderr, "Got write late?\n"); goto err; } } io_uring_cqe_seen(ring, cqe); } ret = 0; goto out; err: ret = 1; out: for (i = 0; i < ARRAY_SIZE(iovecs); i++) free(iovecs[i].iov_base); return ret; } #define FILE_SIZE 1024 static int test_sync_file_range(struct io_uring *ring) { int ret, fd, save_errno; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; t_create_file(".sync_file_range", FILE_SIZE); fd = open(".sync_file_range", O_RDWR); save_errno = errno; unlink(".sync_file_range"); errno = save_errno; if (fd < 0) { perror("file open"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); return 1; } io_uring_prep_sync_file_range(sqe, fd, 0, 0, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe failed: %d\n", ret); return 1; } if (cqe->res) { fprintf(stderr, "sfr failed: %d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } ret = test_single_fsync(&ring); if (ret) { fprintf(stderr, "test_single_fsync failed\n"); return ret; } ret = test_barrier_fsync(&ring); if (ret) { fprintf(stderr, "test_barrier_fsync failed\n"); return ret; } ret = test_sync_file_range(&ring); if (ret) { fprintf(stderr, "test_sync_file_range failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/futex.c000066400000000000000000000261531461424365000154660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: exercise futex wait/wake/waitv * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define LOOPS 500 #define NFUTEX 8 #ifndef FUTEX2_SIZE_U8 #define FUTEX2_SIZE_U8 0x00 #define FUTEX2_SIZE_U16 0x01 #define FUTEX2_SIZE_U32 0x02 #define FUTEX2_SIZE_U64 0x03 #define FUTEX2_NUMA 0x04 /* 0x08 */ /* 0x10 */ /* 0x20 */ /* 0x40 */ #define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG #define FUTEX2_SIZE_MASK 0x03 #endif static int no_futex; static void *fwake(void *data) { unsigned int *futex = data; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init: %d\n", ret); return NULL; } *futex = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); sqe->user_data = 3; io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return NULL; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return NULL; } static int __test(struct io_uring *ring, int vectored, int async, int async_cancel) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct futex_waitv fw[NFUTEX]; unsigned int *futex; pthread_t threads[NFUTEX]; void *tret; int ret, i, nfutex; nfutex = NFUTEX; if (!vectored) nfutex = 1; futex = calloc(nfutex, sizeof(*futex)); for (i = 0; i < nfutex; i++) { fw[i].val = 0; fw[i].uaddr = (unsigned long) &futex[i]; fw[i].flags = FUTEX2_SIZE_U32; fw[i].__reserved = 0; } sqe = io_uring_get_sqe(ring); if (vectored) io_uring_prep_futex_waitv(sqe, fw, nfutex, 0); else io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); if (async) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; io_uring_submit(ring); for (i = 0; i < nfutex; i++) pthread_create(&threads[i], NULL, fwake, &futex[i]); sqe = io_uring_get_sqe(ring); io_uring_prep_cancel64(sqe, 1, 0); if (async_cancel) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 2; io_uring_submit(ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "parent wait %d\n", ret); return 1; } if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) { no_futex = 1; return 0; } io_uring_cqe_seen(ring, cqe); } ret = io_uring_peek_cqe(ring, &cqe); if (!ret) { fprintf(stderr, "peek found cqe!\n"); return 1; } for (i = 0; i < nfutex; i++) pthread_join(threads[i], &tret); return 0; } static int test(int flags, int vectored) { struct io_uring ring; int ret, i; ret = io_uring_queue_init(8, &ring, flags); if (ret) return ret; for (i = 0; i < LOOPS; i++) { int async_cancel = (!i % 2); int async_wait = !(i % 3); ret = __test(&ring, vectored, async_wait, async_cancel); if (ret) { fprintf(stderr, "flags=%x, failed=%d\n", flags, i); break; } if (no_futex) break; } io_uring_queue_exit(&ring); return ret; } static int test_order(int vectored, int async) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct futex_waitv fw; struct io_uring ring; unsigned int *futex; int ret, i; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; futex = malloc(sizeof(*futex)); *futex = 0; fw.val = 0; fw.uaddr = (unsigned long) futex; fw.flags = FUTEX2_SIZE_U32; fw.__reserved = 0; /* * Submit two futex waits */ sqe = io_uring_get_sqe(&ring); if (!vectored) io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); else io_uring_prep_futex_waitv(sqe, &fw, 1, 0); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); if (!vectored) io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); else io_uring_prep_futex_waitv(sqe, &fw, 1, 0); sqe->user_data = 2; io_uring_submit(&ring); /* * Now submit wake for just one futex */ *futex = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_futex_wake(sqe, futex, 1, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); sqe->user_data = 100; if (async) sqe->flags |= IOSQE_ASYNC; io_uring_submit(&ring); /* * We expect to find completions for the first futex wait, and * the futex wake. We should not see the last futex wait. */ for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait %d\n", ret); return 1; } if (cqe->user_data == 1 || cqe->user_data == 100) { io_uring_cqe_seen(&ring, cqe); continue; } fprintf(stderr, "unexpected cqe %lu, res %d\n", (unsigned long) cqe->user_data, cqe->res); return 1; } ret = io_uring_peek_cqe(&ring, &cqe); if (ret != -EAGAIN) { fprintf(stderr, "Unexpected cqe available: %d\n", cqe->res); return 1; } io_uring_queue_exit(&ring); return 0; } static int test_multi_wake(int vectored) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct futex_waitv fw; struct io_uring ring; unsigned int *futex; int ret, i; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; futex = malloc(sizeof(*futex)); *futex = 0; fw.val = 0; fw.uaddr = (unsigned long) futex; fw.flags = FUTEX2_SIZE_U32; fw.__reserved = 0; /* * Submit two futex waits */ sqe = io_uring_get_sqe(&ring); if (!vectored) io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); else io_uring_prep_futex_waitv(sqe, &fw, 1, 0); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); if (!vectored) io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); else io_uring_prep_futex_waitv(sqe, &fw, 1, 0); sqe->user_data = 2; io_uring_submit(&ring); /* * Now submit wake for both futexes */ *futex = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_futex_wake(sqe, futex, 2, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); sqe->user_data = 100; io_uring_submit(&ring); /* * We expect to find completions for the both futex waits, and * the futex wake. */ for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe error %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } ret = io_uring_peek_cqe(&ring, &cqe); if (!ret) { fprintf(stderr, "peek found cqe!\n"); return 1; } io_uring_queue_exit(&ring); return 0; } /* * Test that waking 0 futexes returns 0 */ static int test_wake_zero(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; unsigned int *futex; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; futex = malloc(sizeof(*futex)); *futex = 0; sqe = io_uring_get_sqe(&ring); sqe->user_data = 1; io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); io_uring_submit(&ring); sqe = io_uring_get_sqe(&ring); sqe->user_data = 2; io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, FUTEX2_SIZE_U32, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); /* * Should get zero res and it should be the wake */ if (cqe->res || cqe->user_data != 2) { fprintf(stderr, "cqe res %d, data %ld\n", cqe->res, (long) cqe->user_data); return 1; } io_uring_cqe_seen(&ring, cqe); /* * Should not have the wait complete */ ret = io_uring_peek_cqe(&ring, &cqe); if (!ret) { fprintf(stderr, "peek found cqe!\n"); return 1; } io_uring_queue_exit(&ring); return 0; } /* * Test invalid wait/wake/waitv flags */ static int test_invalid(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct futex_waitv fw; struct io_uring ring; unsigned int *futex; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; futex = malloc(sizeof(*futex)); *futex = 0; sqe = io_uring_get_sqe(&ring); sqe->user_data = 1; io_uring_prep_futex_wait(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); /* * Should get zero res and it should be the wake */ if (cqe->res != -EINVAL) { fprintf(stderr, "wait cqe res %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); sqe = io_uring_get_sqe(&ring); sqe->user_data = 1; io_uring_prep_futex_wake(sqe, futex, 0, FUTEX_BITSET_MATCH_ANY, 0x1000, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); /* * Should get zero res and it should be the wake */ if (cqe->res != -EINVAL) { fprintf(stderr, "wake cqe res %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); fw.val = 0; fw.uaddr = (unsigned long) futex; fw.flags = FUTEX2_SIZE_U32 | 0x1000; fw.__reserved = 0; sqe = io_uring_get_sqe(&ring); sqe->user_data = 1; io_uring_prep_futex_waitv(sqe, &fw, 1, 0); io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); /* * Should get zero res and it should be the wake */ if (cqe->res != -EINVAL) { fprintf(stderr, "waitv cqe res %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0, 0); if (ret) { fprintf(stderr, "test 0 0 failed\n"); return T_EXIT_FAIL; } if (no_futex) return T_EXIT_SKIP; ret = test(0, 1); if (ret) { fprintf(stderr, "test 0 1 failed\n"); return T_EXIT_FAIL; } ret = test_wake_zero(); if (ret) { fprintf(stderr, "wake 0 failed\n"); return T_EXIT_FAIL; } ret = test_invalid(); if (ret) { fprintf(stderr, "test invalid failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SQPOLL, 0); if (ret) { fprintf(stderr, "test sqpoll 0 failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SQPOLL, 1); if (ret) { fprintf(stderr, "test sqpoll 1 failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 0); if (ret) { fprintf(stderr, "test single coop 0 failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN, 1); if (ret) { fprintf(stderr, "test single coop 1 failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_COOP_TASKRUN, 0); if (ret) { fprintf(stderr, "test taskrun 0 failed\n"); return T_EXIT_FAIL; } ret = test(IORING_SETUP_COOP_TASKRUN, 1); if (ret) { fprintf(stderr, "test taskrun 1 failed\n"); return T_EXIT_FAIL; } ret = test_order(0, 0); if (ret) { fprintf(stderr, "test_order 0 0 failed\n"); return T_EXIT_FAIL; } ret = test_order(1, 0); if (ret) { fprintf(stderr, "test_order 1 0 failed\n"); return T_EXIT_FAIL; } ret = test_order(0, 1); if (ret) { fprintf(stderr, "test_order 0 1 failed\n"); return T_EXIT_FAIL; } ret = test_order(1, 1); if (ret) { fprintf(stderr, "test_order 1 1 failed\n"); return T_EXIT_FAIL; } ret = test_multi_wake(0); if (ret) { fprintf(stderr, "multi_wake 0 failed\n"); return T_EXIT_FAIL; } ret = test_multi_wake(1); if (ret) { fprintf(stderr, "multi_wake 1 failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/hardlink.c000066400000000000000000000076331461424365000161310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring linkat handling */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int do_linkat(struct io_uring *ring, int olddirfd, const char *oldname, const char *newname, int flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); return 1; } io_uring_prep_linkat(sqe, olddirfd, oldname, AT_FDCWD, newname, flags); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0); if (ret) { fprintf(stderr, "wait_cqe failed: %d\n", ret); return 1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; } static int files_linked_ok(const char* fn1, const char *fn2) { struct stat s1, s2; if (stat(fn1, &s1)) { fprintf(stderr, "stat(%s): %s\n", fn1, strerror(errno)); return 0; } if (stat(fn2, &s2)) { fprintf(stderr, "stat(%s): %s\n", fn2, strerror(errno)); return 0; } if (s1.st_dev != s2.st_dev || s1.st_ino != s2.st_ino) { fprintf(stderr, "linked files have different device / inode numbers\n"); return 0; } if (s1.st_nlink != 2 || s2.st_nlink != 2) { fprintf(stderr, "linked files have unexpected links count\n"); return 0; } return 1; } int main(int argc, char *argv[]) { static const char target[] = "io_uring-linkat-test-target"; static const char emptyname[] = "io_uring-linkat-test-empty"; static const char linkname[] = "io_uring-linkat-test-link"; static const char symlinkname[] = "io_uring-linkat-test-symlink"; struct io_uring ring; int ret, fd, exit_status = T_EXIT_FAIL; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return ret; } ret = fd = open(target, O_CREAT | O_RDWR | O_EXCL, 0600); if (ret < 0) { perror("open"); goto out; } if (write(fd, "linktest", 8) != 8) { close(fd); goto out; } if(geteuid()) { fprintf(stdout, "not root, skipping AT_EMPTY_PATH test\n"); } else { ret = do_linkat(&ring, fd, "", emptyname, AT_EMPTY_PATH); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "linkat not supported, skipping\n"); exit_status = T_EXIT_SKIP; goto out; } fprintf(stderr, "linkat: %s\n", strerror(-ret)); goto out; } else if (ret) { goto out; } if (!files_linked_ok(emptyname, target)) goto out; unlinkat(AT_FDCWD, emptyname, 0); } close(fd); ret = symlink(target, symlinkname); if (ret < 0) { perror("open"); goto out; } ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "linkat not supported, skipping\n"); exit_status = T_EXIT_SKIP; goto out; } fprintf(stderr, "linkat: %s\n", strerror(-ret)); goto out; } else if (ret) { goto out; } if (!files_linked_ok(linkname, target)) goto out; unlinkat(AT_FDCWD, linkname, 0); ret = do_linkat(&ring, AT_FDCWD, symlinkname, linkname, AT_SYMLINK_FOLLOW); if (ret < 0) { fprintf(stderr, "linkat: %s\n", strerror(-ret)); goto out; } else if (ret) { goto out; } if (!files_linked_ok(symlinkname, target)) goto out; ret = do_linkat(&ring, AT_FDCWD, target, linkname, 0); if (ret != -EEXIST) { fprintf(stderr, "test_linkat linkname already exists failed: %d\n", ret); goto out; } ret = do_linkat(&ring, AT_FDCWD, target, "surely/this/does/not/exist", 0); if (ret != -ENOENT) { fprintf(stderr, "test_linkat no parent failed: %d\n", ret); goto out; } exit_status = T_EXIT_PASS; out: unlinkat(AT_FDCWD, symlinkname, 0); unlinkat(AT_FDCWD, linkname, 0); unlinkat(AT_FDCWD, emptyname, 0); unlinkat(AT_FDCWD, target, 0); io_uring_queue_exit(&ring); return exit_status; } liburing-2.6/test/helpers.c000066400000000000000000000155571461424365000160030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Helpers for tests. */ #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" /* * Helper for allocating memory in tests. */ void *t_malloc(size_t size) { void *ret; ret = malloc(size); assert(ret); return ret; } /* * Helper for binding socket to an ephemeral port. * The port number to be bound is returned in @addr->sin_port. */ int t_bind_ephemeral_port(int fd, struct sockaddr_in *addr) { socklen_t addrlen; int ret; addr->sin_port = 0; if (bind(fd, (struct sockaddr *)addr, sizeof(*addr))) return -errno; addrlen = sizeof(*addr); ret = getsockname(fd, (struct sockaddr *)addr, &addrlen); assert(!ret); assert(addr->sin_port != 0); return 0; } /* * Helper for allocating size bytes aligned on a boundary. */ void t_posix_memalign(void **memptr, size_t alignment, size_t size) { int ret; ret = posix_memalign(memptr, alignment, size); assert(!ret); } /* * Helper for allocating space for an array of nmemb elements * with size bytes for each element. */ void *t_calloc(size_t nmemb, size_t size) { void *ret; ret = calloc(nmemb, size); assert(ret); return ret; } /* * Helper for creating file and write @size byte buf with 0xaa value in the file. */ static void __t_create_file(const char *file, size_t size, char pattern) { ssize_t ret; char *buf; int fd; buf = t_malloc(size); memset(buf, pattern, size); fd = open(file, O_WRONLY | O_CREAT, 0644); assert(fd >= 0); ret = write(fd, buf, size); fsync(fd); close(fd); free(buf); assert(ret == size); } void t_create_file(const char *file, size_t size) { __t_create_file(file, size, 0xaa); } void t_create_file_pattern(const char *file, size_t size, char pattern) { __t_create_file(file, size, pattern); } /* * Helper for creating @buf_num number of iovec * with @buf_size bytes buffer of each iovec. */ struct iovec *t_create_buffers(size_t buf_num, size_t buf_size) { struct iovec *vecs; int i; vecs = t_malloc(buf_num * sizeof(struct iovec)); for (i = 0; i < buf_num; i++) { t_posix_memalign(&vecs[i].iov_base, buf_size, buf_size); vecs[i].iov_len = buf_size; } return vecs; } /* * Helper for setting up an io_uring instance, skipping if the given user isn't * allowed to. */ enum t_setup_ret t_create_ring_params(int depth, struct io_uring *ring, struct io_uring_params *p) { int ret; ret = io_uring_queue_init_params(depth, ring, p); if (!ret) return T_SETUP_OK; if ((p->flags & IORING_SETUP_SQPOLL) && ret == -EPERM && geteuid()) { fprintf(stdout, "SQPOLL skipped for regular user\n"); return T_SETUP_SKIP; } if (ret != -EINVAL) fprintf(stderr, "queue_init: %s\n", strerror(-ret)); return ret; } enum t_setup_ret t_create_ring(int depth, struct io_uring *ring, unsigned int flags) { struct io_uring_params p = { }; p.flags = flags; return t_create_ring_params(depth, ring, &p); } enum t_setup_ret t_register_buffers(struct io_uring *ring, const struct iovec *iovecs, unsigned nr_iovecs) { int ret; ret = io_uring_register_buffers(ring, iovecs, nr_iovecs); if (!ret) return T_SETUP_OK; if ((ret == -EPERM || ret == -ENOMEM) && geteuid()) { fprintf(stdout, "too large non-root buffer registration, skip\n"); return T_SETUP_SKIP; } fprintf(stderr, "buffer register failed: %s\n", strerror(-ret)); return ret; } int t_create_socket_pair(int fd[2], bool stream) { int ret; int type = stream ? SOCK_STREAM : SOCK_DGRAM; int val; struct sockaddr_in serv_addr; struct sockaddr *paddr; socklen_t paddrlen; type |= SOCK_CLOEXEC; fd[0] = socket(AF_INET, type, 0); if (fd[0] < 0) return errno; fd[1] = socket(AF_INET, type, 0); if (fd[1] < 0) { ret = errno; close(fd[0]); return ret; } val = 1; if (setsockopt(fd[0], SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) goto errno_cleanup; memset(&serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_port = 0; inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr); paddr = (struct sockaddr *)&serv_addr; paddrlen = sizeof(serv_addr); if (bind(fd[0], paddr, paddrlen)) { fprintf(stderr, "bind failed\n"); goto errno_cleanup; } if (stream && listen(fd[0], 16)) { fprintf(stderr, "listen failed\n"); goto errno_cleanup; } if (getsockname(fd[0], (struct sockaddr *)&serv_addr, (socklen_t *)&paddrlen)) { fprintf(stderr, "getsockname failed\n"); goto errno_cleanup; } inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr); if (connect(fd[1], (struct sockaddr *)&serv_addr, paddrlen)) { fprintf(stderr, "connect failed\n"); goto errno_cleanup; } if (!stream) { /* connect the other udp side */ if (getsockname(fd[1], (struct sockaddr *)&serv_addr, (socklen_t *)&paddrlen)) { fprintf(stderr, "getsockname failed\n"); goto errno_cleanup; } inet_pton(AF_INET, "127.0.0.1", &serv_addr.sin_addr); if (connect(fd[0], (struct sockaddr *)&serv_addr, paddrlen)) { fprintf(stderr, "connect failed\n"); goto errno_cleanup; } return 0; } /* for stream case we must accept and cleanup the listen socket */ ret = accept(fd[0], NULL, NULL); if (ret < 0) goto errno_cleanup; close(fd[0]); fd[0] = ret; return 0; errno_cleanup: ret = errno; close(fd[0]); close(fd[1]); return ret; } bool t_probe_defer_taskrun(void) { struct io_uring ring; int ret; ret = io_uring_queue_init(1, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret < 0) return false; io_uring_queue_exit(&ring); return true; } /* * Sync internal state with kernel ring state on the SQ side. Returns the * number of pending items in the SQ ring, for the shared ring. */ unsigned __io_uring_flush_sq(struct io_uring *ring) { struct io_uring_sq *sq = &ring->sq; unsigned tail = sq->sqe_tail; if (sq->sqe_head != tail) { sq->sqe_head = tail; /* * Ensure kernel sees the SQE updates before the tail update. */ if (!(ring->flags & IORING_SETUP_SQPOLL)) *sq->ktail = tail; else io_uring_smp_store_release(sq->ktail, tail); } /* * This load needs to be atomic, since sq->khead is written concurrently * by the kernel, but it doesn't need to be load_acquire, since the * kernel doesn't store to the submission queue; it advances khead just * to indicate that it's finished reading the submission queue entries * so they're available for us to write to. */ return tail - IO_URING_READ_ONCE(*sq->khead); } /* * Implementation of error(3), prints an error message and exits. */ void t_error(int status, int errnum, const char *format, ...) { va_list args; va_start(args, format); vfprintf(stderr, format, args); if (errnum) fprintf(stderr, ": %s", strerror(errnum)); fprintf(stderr, "\n"); va_end(args); exit(status); } liburing-2.6/test/helpers.h000066400000000000000000000045161461424365000160010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Helpers for tests. */ #ifndef LIBURING_HELPERS_H #define LIBURING_HELPERS_H #ifdef __cplusplus extern "C" { #endif #include "liburing.h" #include "../src/setup.h" #include enum t_setup_ret { T_SETUP_OK = 0, T_SETUP_SKIP, }; enum t_test_result { T_EXIT_PASS = 0, T_EXIT_FAIL = 1, T_EXIT_SKIP = 77, }; /* * Helper for binding socket to an ephemeral port. * The port number to be bound is returned in @addr->sin_port. */ int t_bind_ephemeral_port(int fd, struct sockaddr_in *addr); /* * Helper for allocating memory in tests. */ void *t_malloc(size_t size); /* * Helper for allocating size bytes aligned on a boundary. */ void t_posix_memalign(void **memptr, size_t alignment, size_t size); /* * Helper for allocating space for an array of nmemb elements * with size bytes for each element. */ void *t_calloc(size_t nmemb, size_t size); /* * Helper for creating file and write @size byte buf with 0xaa value in the file. */ void t_create_file(const char *file, size_t size); /* * Helper for creating file and write @size byte buf with @pattern value in * the file. */ void t_create_file_pattern(const char *file, size_t size, char pattern); /* * Helper for creating @buf_num number of iovec * with @buf_size bytes buffer of each iovec. */ struct iovec *t_create_buffers(size_t buf_num, size_t buf_size); /* * Helper for creating connected socket pairs */ int t_create_socket_pair(int fd[2], bool stream); /* * Helper for setting up a ring and checking for user privs */ enum t_setup_ret t_create_ring_params(int depth, struct io_uring *ring, struct io_uring_params *p); enum t_setup_ret t_create_ring(int depth, struct io_uring *ring, unsigned int flags); enum t_setup_ret t_register_buffers(struct io_uring *ring, const struct iovec *iovecs, unsigned nr_iovecs); bool t_probe_defer_taskrun(void); unsigned __io_uring_flush_sq(struct io_uring *ring); static inline int t_io_uring_init_sqarray(unsigned entries, struct io_uring *ring, struct io_uring_params *p) { int ret; ret = __io_uring_queue_init_params(entries, ring, p, NULL, 0); return ret >= 0 ? 0 : ret; } #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) void t_error(int status, int errnum, const char *format, ...); #ifdef __cplusplus } #endif #endif liburing-2.6/test/io-cancel.c000066400000000000000000000271621461424365000161660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Basic IO cancel test */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024) #define BS 4096 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; static unsigned long long utime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000000; return sec + usec; } static unsigned long long utime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return utime_since(tv, &end); } static int start_io(struct io_uring *ring, int fd, int do_write) { struct io_uring_sqe *sqe; int i, ret; for (i = 0; i < BUFFERS; i++) { off_t offset; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } offset = BS * (rand() % BUFFERS); if (do_write) { io_uring_prep_writev(sqe, fd, &vecs[i], 1, offset); } else { io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); } sqe->user_data = i + 1; } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); goto err; } return 0; err: return 1; } static int wait_io(struct io_uring *ring, unsigned nr_io, int do_partial) { struct io_uring_cqe *cqe; int i, ret; for (i = 0; i < nr_io; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (do_partial && cqe->user_data) { if (!(cqe->user_data & 1)) { if (cqe->res != BS) { fprintf(stderr, "IO %d wasn't cancelled but got error %d\n", (unsigned) cqe->user_data, cqe->res); goto err; } } } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int do_io(struct io_uring *ring, int fd, int do_write) { if (start_io(ring, fd, do_write)) return 1; if (wait_io(ring, BUFFERS, 0)) return 1; return 0; } static int start_cancel(struct io_uring *ring, int do_partial, int async_cancel) { struct io_uring_sqe *sqe; int i, ret, submitted = 0; for (i = 0; i < BUFFERS; i++) { if (do_partial && (i & 1)) continue; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } io_uring_prep_cancel64(sqe, i + 1, 0); if (async_cancel) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 0; submitted++; } ret = io_uring_submit(ring); if (ret != submitted) { fprintf(stderr, "submit got %d, wanted %d\n", ret, submitted); goto err; } return 0; err: return 1; } /* * Test cancels. If 'do_partial' is set, then we only attempt to cancel half of * the submitted IO. This is done to verify that cancelling one piece of IO doesn't * impact others. */ static int test_io_cancel(const char *file, int do_write, int do_partial, int async_cancel) { struct io_uring ring; struct timeval start_tv; unsigned long usecs; unsigned to_wait; int fd, ret; fd = open(file, O_RDWR | O_DIRECT); if (fd < 0) { if (errno == EINVAL) return T_EXIT_SKIP; perror("file open"); goto err; } ret = io_uring_queue_init(4 * BUFFERS, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); goto err; } if (do_io(&ring, fd, do_write)) goto err; gettimeofday(&start_tv, NULL); if (do_io(&ring, fd, do_write)) goto err; usecs = utime_since_now(&start_tv); if (start_io(&ring, fd, do_write)) goto err; /* sleep for 1/3 of the total time, to allow some to start/complete */ usleep(usecs / 3); if (start_cancel(&ring, do_partial, async_cancel)) goto err; to_wait = BUFFERS; if (do_partial) to_wait += BUFFERS / 2; else to_wait += BUFFERS; if (wait_io(&ring, to_wait, do_partial)) goto err; io_uring_queue_exit(&ring); close(fd); return 0; err: if (fd != -1) close(fd); return 1; } static int test_dont_cancel_another_ring(void) { struct io_uring ring1, ring2; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buffer[128]; int ret, fds[2]; struct __kernel_timespec ts = { .tv_sec = 0, .tv_nsec = 100000000, }; ret = io_uring_queue_init(8, &ring1, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = io_uring_queue_init(8, &ring2, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(&ring1); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_read(sqe, fds[0], buffer, 10, 0); sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; ret = io_uring_submit(&ring1); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } /* make sure it doesn't cancel requests of the other ctx */ sqe = io_uring_get_sqe(&ring2); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_cancel64(sqe, 1, 0); sqe->user_data = 2; ret = io_uring_submit(&ring2); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(&ring2, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->user_data != 2 || cqe->res != -ENOENT) { fprintf(stderr, "error: cqe %i: res=%i, but expected -ENOENT\n", (int)cqe->user_data, (int)cqe->res); return 1; } io_uring_cqe_seen(&ring2, cqe); ret = io_uring_wait_cqe_timeout(&ring1, &cqe, &ts); if (ret != -ETIME) { fprintf(stderr, "read got cancelled or wait failed\n"); return 1; } io_uring_cqe_seen(&ring1, cqe); close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring1); io_uring_queue_exit(&ring2); return 0; } static int test_cancel_req_across_fork(void) { struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buffer[128]; int ret, i, fds[2]; pid_t p; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_read(sqe, fds[0], buffer, 10, 0); sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } p = fork(); if (p == -1) { fprintf(stderr, "fork() failed\n"); return 1; } if (p == 0) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } io_uring_prep_cancel64(sqe, 1, 0); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } for (i = 0; i < 2; ++i) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } switch (cqe->user_data) { case 1: if (cqe->res != -EINTR && cqe->res != -ECANCELED) { fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); exit(1); } break; case 2: if (cqe->res != -EALREADY && cqe->res) { fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); exit(1); } break; default: fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); exit(1); } io_uring_cqe_seen(&ring, cqe); } exit(0); } else { int wstatus; pid_t childpid; do { childpid = waitpid(p, &wstatus, 0); } while (childpid == (pid_t)-1 && errno == EINTR); if (childpid == (pid_t)-1) { perror("waitpid()"); return 1; } if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); return 1; } } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } static int test_cancel_inflight_exit(void) { struct __kernel_timespec ts = { .tv_sec = 1, .tv_nsec = 0, }; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; pid_t p; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } p = fork(); if (p == -1) { fprintf(stderr, "fork() failed\n"); return 1; } if (p == 0) { sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 2; sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 3; ret = io_uring_submit(&ring); if (ret != 3) { fprintf(stderr, "io_uring_submit() failed %s, ret %i\n", __FUNCTION__, ret); exit(1); } exit(0); } else { int wstatus; if (waitpid(p, &wstatus, 0) == (pid_t)-1) { perror("waitpid()"); return 1; } if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); return 1; } } for (i = 0; i < 3; ++i) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if ((cqe->user_data == 1 && cqe->res != -ECANCELED) || (cqe->user_data == 2 && cqe->res != -ECANCELED) || (cqe->user_data == 3 && cqe->res != -ETIME)) { fprintf(stderr, "%i %i\n", (int)cqe->user_data, cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return 0; } static int test_sqpoll_cancel_iowq_requests(void) { struct io_uring ring; struct io_uring_sqe *sqe; int ret, fds[2]; char buffer[16]; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SQPOLL); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } if (pipe(fds)) { perror("pipe"); return 1; } /* pin both pipe ends via io-wq */ sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], buffer, 10, 0); sqe->flags |= IOSQE_ASYNC | IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_write(sqe, fds[1], buffer, 10, 0); sqe->flags |= IOSQE_ASYNC; sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } /* wait for sqpoll to kick in and submit before exit */ sleep(1); io_uring_queue_exit(&ring); /* close the write end, so if ring is cancelled properly read() fails*/ close(fds[1]); ret = read(fds[0], buffer, 10); close(fds[0]); return 0; } int main(int argc, char *argv[]) { const char *fname = ".io-cancel-test"; int i, ret; if (argc > 1) return T_EXIT_SKIP; if (test_dont_cancel_another_ring()) { fprintf(stderr, "test_dont_cancel_another_ring() failed\n"); return T_EXIT_FAIL; } if (test_cancel_req_across_fork()) { fprintf(stderr, "test_cancel_req_across_fork() failed\n"); return T_EXIT_FAIL; } if (test_cancel_inflight_exit()) { fprintf(stderr, "test_cancel_inflight_exit() failed\n"); return T_EXIT_FAIL; } if (test_sqpoll_cancel_iowq_requests()) { fprintf(stderr, "test_sqpoll_cancel_iowq_requests() failed\n"); return T_EXIT_FAIL; } t_create_file(fname, FILE_SIZE); vecs = t_create_buffers(BUFFERS, BS); for (i = 0; i < 8; i++) { int write = (i & 1) != 0; int partial = (i & 2) != 0; int async = (i & 4) != 0; ret = test_io_cancel(fname, write, partial, async); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_io_cancel %d %d %d failed\n", write, partial, async); goto err; } } unlink(fname); return T_EXIT_PASS; err: unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/io_uring_enter.c000066400000000000000000000136471461424365000173470ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * io_uring_enter.c * * Description: Unit tests for the io_uring_enter system call. * * Copyright 2019, Red Hat, Inc. * Author: Jeff Moyer */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "liburing/barrier.h" #include "../src/syscall.h" #define IORING_MAX_ENTRIES 4096 #define IORING_MAX_ENTRIES_FALLBACK 128 static int expect_fail(int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, int error) { int ret; ret = io_uring_enter(fd, to_submit, min_complete, flags, sig); if (ret >= 0) { fprintf(stderr, "expected %s, but call succeeded\n", strerror(-error)); return 1; } if (ret != error) { fprintf(stderr, "expected %d, got %d\n", error, ret); return 1; } return 0; } static int try_io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags, sigset_t *sig, int expect) { int ret; if (expect < 0) return expect_fail(fd, to_submit, min_complete, flags, sig, expect); ret = io_uring_enter(fd, to_submit, min_complete, flags, sig); if (ret != expect) { fprintf(stderr, "Expected %d, got %d\n", expect, ret); return 1; } return 0; } /* * prep a read I/O. index is treated like a block number. */ static int setup_file(char *template, off_t len) { int fd, ret; char buf[4096]; fd = mkstemp(template); if (fd < 0) { perror("mkstemp"); exit(1); } ret = ftruncate(fd, len); if (ret < 0) { perror("ftruncate"); exit(1); } ret = read(fd, buf, 4096); if (ret != 4096) { fprintf(stderr, "read returned %d, expected 4096\n", ret); exit(1); } return fd; } static void io_prep_read(struct io_uring_sqe *sqe, int fd, off_t offset, size_t len) { struct iovec *iov; iov = t_malloc(sizeof(*iov)); assert(iov); iov->iov_base = t_malloc(len); assert(iov->iov_base); iov->iov_len = len; io_uring_prep_readv(sqe, fd, iov, 1, offset); io_uring_sqe_set_data(sqe, iov); // free on completion } static void reap_events(struct io_uring *ring, unsigned nr) { int ret; unsigned left = nr; struct io_uring_cqe *cqe; struct iovec *iov; struct timeval start, now, elapsed; gettimeofday(&start, NULL); while (left) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe returned %d\n", ret); exit(1); } if (cqe->res != 4096) fprintf(stderr, "cqe->res: %d, expected 4096\n", cqe->res); iov = io_uring_cqe_get_data(cqe); free(iov->iov_base); free(iov); left--; io_uring_cqe_seen(ring, cqe); gettimeofday(&now, NULL); timersub(&now, &start, &elapsed); if (elapsed.tv_sec > 10) { fprintf(stderr, "Timed out waiting for I/Os to complete.\n"); fprintf(stderr, "%u expected, %u completed\n", nr, left); break; } } } static void submit_io(struct io_uring *ring, unsigned nr) { int fd, ret; off_t file_len; unsigned i; static char template[32] = "/tmp/io_uring_enter-test.XXXXXX"; struct io_uring_sqe *sqe; file_len = nr * 4096; fd = setup_file(template, file_len); for (i = 0; i < nr; i++) { /* allocate an sqe */ sqe = io_uring_get_sqe(ring); /* fill it in */ io_prep_read(sqe, fd, i * 4096, 4096); } /* submit the I/Os */ ret = io_uring_submit(ring); unlink(template); if (ret < 0) { perror("io_uring_enter"); exit(1); } } int main(int argc, char **argv) { int ret; unsigned int status = 0; struct io_uring ring; struct io_uring_sq *sq = &ring.sq; unsigned ktail, mask, index; unsigned sq_entries; unsigned completed, dropped; struct io_uring_params p; if (argc > 1) return T_EXIT_SKIP; memset(&p, 0, sizeof(p)); ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES, &ring, &p); if (ret == -ENOMEM) ret = t_io_uring_init_sqarray(IORING_MAX_ENTRIES_FALLBACK, &ring, &p); if (ret < 0) { perror("io_uring_queue_init"); exit(T_EXIT_FAIL); } mask = sq->ring_mask; /* invalid flags */ status |= try_io_uring_enter(ring.ring_fd, 1, 0, ~0U, NULL, -EINVAL); /* invalid fd, EBADF */ status |= try_io_uring_enter(-1, 0, 0, 0, NULL, -EBADF); /* valid, non-ring fd, EOPNOTSUPP */ status |= try_io_uring_enter(0, 0, 0, 0, NULL, -EOPNOTSUPP); /* to_submit: 0, flags: 0; should get back 0. */ status |= try_io_uring_enter(ring.ring_fd, 0, 0, 0, NULL, 0); /* fill the sq ring */ sq_entries = ring.sq.ring_entries; submit_io(&ring, sq_entries); ret = io_uring_enter(ring.ring_fd, 0, sq_entries, IORING_ENTER_GETEVENTS, NULL); if (ret < 0) { fprintf(stderr, "io_uring_enter: %s\n", strerror(-ret)); status = 1; } else { /* * This is a non-IOPOLL ring, which means that io_uring_enter * should not return until min_complete events are available * in the completion queue. */ completed = *ring.cq.ktail - *ring.cq.khead; if (completed != sq_entries) { fprintf(stderr, "Submitted %u I/Os, but only got %u completions\n", sq_entries, completed); status = 1; } reap_events(&ring, sq_entries); } /* * Add an invalid index to the submission queue. This should * result in the dropped counter increasing. */ index = sq->ring_entries + 1; // invalid index dropped = *sq->kdropped; ktail = *sq->ktail; sq->array[ktail & mask] = index; ++ktail; /* * Ensure that the kernel sees the SQE update before it sees the tail * update. */ io_uring_smp_store_release(sq->ktail, ktail); ret = io_uring_enter(ring.ring_fd, 1, 0, 0, NULL); /* now check to see if our sqe was dropped */ if (*sq->kdropped == dropped) { fprintf(stderr, "dropped counter did not increase\n"); status = 1; } if (!status) return T_EXIT_PASS; fprintf(stderr, "FAIL\n"); return T_EXIT_FAIL; } liburing-2.6/test/io_uring_passthrough.c000066400000000000000000000236731461424365000206010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: basic read/write tests for io_uring passthrough commands */ #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "../src/syscall.h" #include "nvme.h" #define FILE_SIZE (256 * 1024) #define BS 8192 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; static int no_pt; /* * Each offset in the file has the ((test_case / 2) * FILE_SIZE) * + (offset / sizeof(int)) stored for every * sizeof(int) address. */ static int verify_buf(int tc, void *buf, off_t off) { int i, u_in_buf = BS / sizeof(unsigned int); unsigned int *ptr; off /= sizeof(unsigned int); off += (tc / 2) * FILE_SIZE; ptr = buf; for (i = 0; i < u_in_buf; i++) { if (off != *ptr) { fprintf(stderr, "Found %u, wanted %llu\n", *ptr, (unsigned long long) off); return 1; } ptr++; off++; } return 0; } static int fill_pattern(int tc) { unsigned int val, *ptr; int i, j; int u_in_buf = BS / sizeof(val); val = (tc / 2) * FILE_SIZE; for (i = 0; i < BUFFERS; i++) { ptr = vecs[i].iov_base; for (j = 0; j < u_in_buf; j++) { *ptr = val; val++; ptr++; } } return 0; } static int __test_io(const char *file, struct io_uring *ring, int tc, int read, int sqthread, int fixed, int nonvec) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct nvme_uring_cmd *cmd; int open_flags; int do_fixed; int i, ret, fd = -1; off_t offset; __u64 slba; __u32 nlb; if (read) open_flags = O_RDONLY; else open_flags = O_WRONLY; if (fixed) { ret = t_register_buffers(ring, vecs, BUFFERS); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "buffer reg failed: %d\n", ret); goto err; } } fd = open(file, open_flags); if (fd < 0) { perror("file open"); goto err; } if (sqthread) { ret = io_uring_register_files(ring, &fd, 1); if (ret) { fprintf(stderr, "file reg failed: %d\n", ret); goto err; } } if (!read) fill_pattern(tc); offset = 0; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } if (read) { int use_fd = fd; do_fixed = fixed; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); sqe->cmd_op = NVME_URING_CMD_IO; } else if (nonvec) { io_uring_prep_read(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset); sqe->cmd_op = NVME_URING_CMD_IO; } else { io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, offset); sqe->cmd_op = NVME_URING_CMD_IO_VEC; } } else { int use_fd = fd; do_fixed = fixed; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); sqe->cmd_op = NVME_URING_CMD_IO; } else if (nonvec) { io_uring_prep_write(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset); sqe->cmd_op = NVME_URING_CMD_IO; } else { io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, offset); sqe->cmd_op = NVME_URING_CMD_IO_VEC; } } sqe->opcode = IORING_OP_URING_CMD; sqe->user_data = ((uint64_t)offset << 32) | i; if (sqthread) sqe->flags |= IOSQE_FIXED_FILE; cmd = (struct nvme_uring_cmd *)sqe->cmd; memset(cmd, 0, sizeof(struct nvme_uring_cmd)); cmd->opcode = read ? nvme_cmd_read : nvme_cmd_write; slba = offset >> lba_shift; nlb = (BS >> lba_shift) - 1; /* cdw10 and cdw11 represent starting lba */ cmd->cdw10 = slba & 0xffffffff; cmd->cdw11 = slba >> 32; /* cdw12 represent number of lba's for read/write */ cmd->cdw12 = nlb; if (do_fixed || nonvec) { cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base; cmd->data_len = vecs[i].iov_len; } else { cmd->addr = (__u64)(uintptr_t)&vecs[i]; cmd->data_len = 1; } cmd->nsid = nsid; offset += BS; } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); goto err; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->res != 0) { if (!no_pt) { no_pt = 1; goto skip; } fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); if (read) { int index = cqe->user_data & 0xffffffff; void *buf = vecs[index].iov_base; off_t voff = cqe->user_data >> 32; if (verify_buf(tc, buf, voff)) goto err; } } if (fixed) { ret = io_uring_unregister_buffers(ring); if (ret) { fprintf(stderr, "buffer unreg failed: %d\n", ret); goto err; } } if (sqthread) { ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "file unreg failed: %d\n", ret); goto err; } } skip: close(fd); return 0; err: if (fd != -1) close(fd); return 1; } static int test_io(const char *file, int tc, int read, int sqthread, int fixed, int nonvec) { struct io_uring ring; int ret, ring_flags = 0; ring_flags |= IORING_SETUP_SQE128; ring_flags |= IORING_SETUP_CQE32; if (sqthread) ring_flags |= IORING_SETUP_SQPOLL; ret = t_create_ring(64, &ring, ring_flags); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { if (ret == -EINVAL) { no_pt = 1; return T_SETUP_SKIP; } fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = __test_io(file, &ring, tc, read, sqthread, fixed, nonvec); io_uring_queue_exit(&ring); return ret; } /* * Send a passthrough command that nvme will fail during submission. * This comes handy for testing error handling. */ static int test_invalid_passthru_submit(const char *file) { struct io_uring ring; int fd, ret, ring_flags, open_flags; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct nvme_uring_cmd *cmd; ring_flags = IORING_SETUP_CQE32 | IORING_SETUP_SQE128; ret = t_create_ring(1, &ring, ring_flags); if (ret != T_SETUP_OK) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } open_flags = O_RDONLY; fd = open(file, open_flags); if (fd < 0) { perror("file open"); goto err; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, vecs[0].iov_base, vecs[0].iov_len, 0); sqe->cmd_op = NVME_URING_CMD_IO; sqe->opcode = IORING_OP_URING_CMD; sqe->user_data = 1; cmd = (struct nvme_uring_cmd *)sqe->cmd; memset(cmd, 0, sizeof(struct nvme_uring_cmd)); cmd->opcode = nvme_cmd_read; cmd->addr = (__u64)(uintptr_t)&vecs[0].iov_base; cmd->data_len = vecs[0].iov_len; /* populate wrong nsid to force failure */ cmd->nsid = nsid + 1; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit got %d, wanted %d\n", ret, 1); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->res == 0) { fprintf(stderr, "cqe res %d, wanted failure\n", cqe->res); goto err; } io_uring_cqe_seen(&ring, cqe); close(fd); io_uring_queue_exit(&ring); return 0; err: if (fd != -1) close(fd); io_uring_queue_exit(&ring); return 1; } /* * if we are polling io_uring_submit needs to always enter the * kernel to fetch events */ static int test_io_uring_submit_enters(const char *file) { struct io_uring ring; int fd, i, ret, ring_flags, open_flags; unsigned head; struct io_uring_cqe *cqe; struct nvme_uring_cmd *cmd; struct io_uring_sqe *sqe; ring_flags = IORING_SETUP_IOPOLL; ring_flags |= IORING_SETUP_SQE128; ring_flags |= IORING_SETUP_CQE32; ret = io_uring_queue_init(64, &ring, ring_flags); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } open_flags = O_WRONLY; fd = open(file, open_flags); if (fd < 0) { perror("file open"); goto err; } for (i = 0; i < BUFFERS; i++) { off_t offset = BS * (rand() % BUFFERS); __u64 slba; __u32 nlb; sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); sqe->user_data = i; sqe->opcode = IORING_OP_URING_CMD; sqe->cmd_op = NVME_URING_CMD_IO; cmd = (struct nvme_uring_cmd *)sqe->cmd; memset(cmd, 0, sizeof(struct nvme_uring_cmd)); slba = offset >> lba_shift; nlb = (BS >> lba_shift) - 1; cmd->opcode = nvme_cmd_read; cmd->cdw10 = slba & 0xffffffff; cmd->cdw11 = slba >> 32; cmd->cdw12 = nlb; cmd->addr = (__u64)(uintptr_t)&vecs[i]; cmd->data_len = 1; cmd->nsid = nsid; } /* submit manually to avoid adding IORING_ENTER_GETEVENTS */ ret = __sys_io_uring_enter(ring.ring_fd, __io_uring_flush_sq(&ring), 0, 0, NULL); if (ret < 0) goto err; for (i = 0; i < 500; i++) { ret = io_uring_submit(&ring); if (ret != 0) { fprintf(stderr, "still had %d sqes to submit\n", ret); goto err; } io_uring_for_each_cqe(&ring, head, cqe) { if (cqe->res == -EOPNOTSUPP) fprintf(stdout, "Device doesn't support polled IO\n"); goto ok; } usleep(10000); } err: ret = 1; if (fd != -1) close(fd); ok: io_uring_queue_exit(&ring); return ret; } int main(int argc, char *argv[]) { int i, ret; char *fname; if (argc < 2) return T_EXIT_SKIP; fname = argv[1]; ret = nvme_get_info(fname); if (ret) return T_EXIT_SKIP; vecs = t_create_buffers(BUFFERS, BS); for (i = 0; i < 16; i++) { int read = (i & 1) != 0; int sqthread = (i & 2) != 0; int fixed = (i & 4) != 0; int nonvec = (i & 8) != 0; ret = test_io(fname, i, read, sqthread, fixed, nonvec); if (no_pt) break; if (ret) { fprintf(stderr, "test_io failed %d/%d/%d/%d\n", read, sqthread, fixed, nonvec); goto err; } } if (no_pt) return T_EXIT_SKIP; ret = test_io_uring_submit_enters(fname); if (ret) { fprintf(stderr, "test_io_uring_submit_enters failed\n"); goto err; } ret = test_invalid_passthru_submit(fname); if (ret) { fprintf(stderr, "test_invalid_passthru_submit failed\n"); goto err; } return T_EXIT_PASS; err: return T_EXIT_FAIL; } liburing-2.6/test/io_uring_register.c000066400000000000000000000267441461424365000200600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * io_uring_register.c * * Description: Unit tests for the io_uring_register system call. * * Copyright 2019, Red Hat, Inc. * Author: Jeff Moyer */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "../src/syscall.h" static int pagesize; static rlim_t mlock_limit; static int devnull; static int expect_fail(int fd, unsigned int opcode, void *arg, unsigned int nr_args, int error, int error2) { int ret; ret = io_uring_register(fd, opcode, arg, nr_args); if (ret >= 0) { int ret2 = 0; fprintf(stderr, "expected %s, but call succeeded\n", strerror(error)); if (opcode == IORING_REGISTER_BUFFERS) { ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); } else if (opcode == IORING_REGISTER_FILES) { ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0, 0); } if (ret2) { fprintf(stderr, "internal error: failed to unregister\n"); exit(1); } return 1; } if (ret != error && (error2 && ret != error2)) { fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret); return 1; } return 0; } static int new_io_uring(int entries, struct io_uring_params *p) { int fd; fd = io_uring_setup(entries, p); if (fd < 0) { perror("io_uring_setup"); exit(1); } return fd; } #define MAXFDS (UINT_MAX * sizeof(int)) static void *map_filebacked(size_t size) { int fd, ret; void *addr; char template[32] = "io_uring_register-test-XXXXXXXX"; fd = mkstemp(template); if (fd < 0) { perror("mkstemp"); return NULL; } unlink(template); ret = ftruncate(fd, size); if (ret < 0) { perror("ftruncate"); close(fd); return NULL; } addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); close(fd); return NULL; } close(fd); return addr; } /* * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now, * but probably should augment it to test 253 and 254, specifically. */ static int test_max_fds(int uring_fd) { int status = 1; int ret; void *fd_as; /* file descriptor address space */ int fdtable_fd; /* fd for the file that will be mapped over and over */ int io_fd; /* the valid fd for I/O -- /dev/null */ int *fds; /* used to map the file into the address space */ char template[32] = "io_uring_register-test-XXXXXXXX"; unsigned long long i, nr_maps, nr_fds; /* * First, mmap anonymous the full size. That will guarantee the * mapping will fit in the memory area selected by mmap. Then, * over-write that mapping using a file-backed mapping, 128MiB at * a time using MAP_FIXED. */ fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (fd_as == MAP_FAILED) { if (errno == ENOMEM) return 0; perror("mmap fd_as"); exit(1); } fdtable_fd = mkstemp(template); if (fdtable_fd < 0) { perror("mkstemp"); exit(1); } unlink(template); ret = ftruncate(fdtable_fd, 128*1024*1024); if (ret < 0) { perror("ftruncate"); exit(1); } io_fd = open("/dev/null", O_RDWR); if (io_fd < 0) { perror("open /dev/null"); exit(1); } fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, fdtable_fd, 0); if (fds == MAP_FAILED) { perror("mmap fdtable"); exit(1); } /* fill the fd table */ nr_fds = 128*1024*1024 / sizeof(int); for (i = 0; i < nr_fds; i++) fds[i] = io_fd; /* map the file through the rest of the address space */ nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024); for (i = 0; i < nr_maps; i++) { fds = &fds[nr_fds]; /* advance fds by 128MiB */ fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, fdtable_fd, 0); if (fds == MAP_FAILED) { fprintf(stderr, "mmap failed at offset %lu\n", (unsigned long)((char *)fd_as - (char *)fds)); exit(1); } } /* Now fd_as points to the file descriptor array. */ /* * We may not be able to map all of these files. Let's back off * until success. */ nr_fds = UINT_MAX; while (nr_fds) { ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as, nr_fds); if (ret != 0) { nr_fds /= 2; continue; } status = 0; ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0); if (ret < 0) { errno = -ret; perror("io_uring_register UNREGISTER_FILES"); exit(1); } break; } close(io_fd); close(fdtable_fd); ret = munmap(fd_as, UINT_MAX * sizeof(int)); if (ret != 0) { fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int)); exit(1); } return status; } static int test_memlock_exceeded(int fd) { int ret; void *buf; struct iovec iov; /* if limit is larger than 2gb, just skip this test */ if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL) return 0; iov.iov_len = mlock_limit * 2; buf = t_malloc(iov.iov_len); iov.iov_base = buf; while (iov.iov_len) { ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); if (ret == -ENOMEM) { iov.iov_len /= 2; continue; } else if (ret == -EFAULT) { free(buf); return 0; } else if (ret) { fprintf(stderr, "expected success or EFAULT, got %d\n", ret); free(buf); return 1; } ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0); if (ret != 0) { fprintf(stderr, "error: unregister failed with %d\n", ret); free(buf); return 1; } break; } if (!iov.iov_len) printf("Unable to register buffers. Check memlock rlimit.\n"); free(buf); return 0; } static int test_iovec_nr(int fd) { int i, ret, status = 0; unsigned int nr = 1000000; struct iovec *iovs; void *buf; iovs = malloc(nr * sizeof(struct iovec)); if (!iovs) { fprintf(stdout, "can't allocate iovecs, skip\n"); return 0; } buf = t_malloc(pagesize); for (i = 0; i < nr; i++) { iovs[i].iov_base = buf; iovs[i].iov_len = pagesize; } status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0); /* reduce to UIO_MAXIOV */ nr = UIO_MAXIOV; ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) { fprintf(stderr, "can't register large iovec for regular users, skip\n"); } else if (ret != 0) { fprintf(stderr, "expected success, got %d\n", ret); status = 1; } else { io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); } free(buf); free(iovs); return status; } /* * io_uring limit is 1G. iov_len limit is ~OUL, I think */ static int test_iovec_size(int fd) { unsigned int status = 0; int ret; struct iovec iov; void *buf; /* NULL pointer for base */ iov.iov_base = 0; iov.iov_len = 4096; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); /* valid base, 0 length */ iov.iov_base = &buf; iov.iov_len = 0; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); /* valid base, length exceeds size */ /* this requires an unampped page directly after buf */ buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); assert(buf != MAP_FAILED); ret = munmap(buf + pagesize, pagesize); assert(ret == 0); iov.iov_base = buf; iov.iov_len = 2 * pagesize; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); munmap(buf, pagesize); /* huge page */ buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (buf == MAP_FAILED) { printf("Unable to map a huge page. Try increasing " "/proc/sys/vm/nr_hugepages by at least 1.\n"); printf("Skipping the hugepage test\n"); } else { /* * This should succeed, so long as RLIMIT_MEMLOCK is * not exceeded */ iov.iov_base = buf; iov.iov_len = 2*1024*1024; ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); if (ret < 0) { if (ret == -ENOMEM) printf("Unable to test registering of a huge " "page. Try increasing the " "RLIMIT_MEMLOCK resource limit by at " "least 2MB."); else { fprintf(stderr, "expected success, got %d\n", ret); status = 1; } } else { ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); if (ret < 0) { fprintf(stderr, "io_uring_unregister: %s\n", strerror(-ret)); status = 1; } } } ret = munmap(iov.iov_base, iov.iov_len); assert(ret == 0); /* file-backed buffers -- not supported */ buf = map_filebacked(2*1024*1024); if (!buf) status = 1; iov.iov_base = buf; iov.iov_len = 2*1024*1024; status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP); munmap(buf, 2*1024*1024); /* bump up against the soft limit and make sure we get EFAULT * or whatever we're supposed to get. NOTE: this requires * running the test as non-root. */ if (getuid() != 0) status |= test_memlock_exceeded(fd); return status; } static int ioring_poll(struct io_uring *ring, int fd, int fixed) { int ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(ring); memset(sqe, 0, sizeof(*sqe)); sqe->opcode = IORING_OP_POLL_ADD; if (fixed) sqe->flags = IOSQE_FIXED_FILE; sqe->fd = fd; sqe->poll_events = POLLIN|POLLOUT; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "failed to submit poll sqe: %d.\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret); return 1; } ret = 0; if (!(cqe->res & POLLOUT)) { fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n", POLLOUT, cqe->res); ret = 1; } io_uring_cqe_seen(ring, cqe); return ret; } static int test_poll_ringfd(void) { int status = 0; int ret; int fd; struct io_uring ring; ret = io_uring_queue_init(1, &ring, 0); if (ret) { perror("io_uring_queue_init"); return 1; } fd = ring.ring_fd; /* try polling the ring fd */ status = ioring_poll(&ring, fd, 0); /* * now register the ring fd, and try the poll again. This should * fail, because the kernel does not allow registering of the * ring_fd. */ status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0); /* tear down queue */ io_uring_queue_exit(&ring); return status; } int main(int argc, char **argv) { int fd, ret; unsigned int status = 0; struct io_uring_params p; struct rlimit rlim; if (argc > 1) return T_EXIT_SKIP; /* setup globals */ pagesize = getpagesize(); ret = getrlimit(RLIMIT_MEMLOCK, &rlim); if (ret < 0) { perror("getrlimit"); return T_EXIT_PASS; } mlock_limit = rlim.rlim_cur; devnull = open("/dev/null", O_RDWR); if (devnull < 0) { perror("open /dev/null"); exit(T_EXIT_FAIL); } /* invalid fd */ status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0); /* valid fd that is not an io_uring fd */ status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0); /* invalid opcode */ memset(&p, 0, sizeof(p)); fd = new_io_uring(1, &p); ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0); if (ret) { /* if this succeeds, tear down the io_uring instance * and start clean for the next test. */ close(fd); fd = new_io_uring(1, &p); } /* IORING_REGISTER_BUFFERS */ status |= test_iovec_size(fd); status |= test_iovec_nr(fd); /* IORING_REGISTER_FILES */ status |= test_max_fds(fd); close(fd); /* uring poll on the uring fd */ status |= test_poll_ringfd(); if (status) fprintf(stderr, "FAIL\n"); return status; } liburing-2.6/test/io_uring_setup.c000066400000000000000000000051271461424365000173640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * io_uring_setup.c * * Description: Unit tests for the io_uring_setup system call. * * Copyright 2019, Red Hat, Inc. * Author: Jeff Moyer */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" /* bogus: setup returns a valid fd on success... expect can't predict the fd we'll get, so this really only takes 1 parameter: error */ static int try_io_uring_setup(unsigned entries, struct io_uring_params *p, int expect) { int ret; ret = io_uring_setup(entries, p); if (ret != expect) { fprintf(stderr, "expected %d, got %d\n", expect, ret); /* if we got a valid uring, close it */ if (ret > 0) close(ret); return 1; } if (expect < 0 && expect != ret) { if (ret == -EPERM && geteuid() != 0) { printf("Needs root, not flagging as an error\n"); return 0; } fprintf(stderr, "expected errno %d, got %d\n", expect, ret); return 1; } return 0; } int main(int argc, char **argv) { int fd; unsigned int status = 0; struct io_uring_params p; if (argc > 1) return T_EXIT_SKIP; memset(&p, 0, sizeof(p)); status |= try_io_uring_setup(0, &p, -EINVAL); status |= try_io_uring_setup(1, NULL, -EFAULT); /* resv array is non-zero */ memset(&p, 0, sizeof(p)); p.resv[0] = p.resv[1] = p.resv[2] = 1; status |= try_io_uring_setup(1, &p, -EINVAL); /* invalid flags */ memset(&p, 0, sizeof(p)); p.flags = ~0U; status |= try_io_uring_setup(1, &p, -EINVAL); /* IORING_SETUP_SQ_AFF set but not IORING_SETUP_SQPOLL */ memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_SQ_AFF; status |= try_io_uring_setup(1, &p, -EINVAL); /* attempt to bind to invalid cpu */ memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF; p.sq_thread_cpu = get_nprocs_conf(); status |= try_io_uring_setup(1, &p, -EINVAL); /* I think we can limit a process to a set of cpus. I assume * we shouldn't be able to setup a kernel thread outside of that. * try to do that. (task->cpus_allowed) */ /* read/write on io_uring_fd */ memset(&p, 0, sizeof(p)); fd = io_uring_setup(1, &p); if (fd < 0) { fprintf(stderr, "io_uring_setup failed with %d, expected success\n", -fd); status = 1; } else { char buf[4096]; int ret; ret = read(fd, buf, 4096); if (ret >= 0) { fprintf(stderr, "read from io_uring fd succeeded. expected fail\n"); status = 1; } } if (!status) return T_EXIT_PASS; fprintf(stderr, "FAIL\n"); return T_EXIT_FAIL; } liburing-2.6/test/iopoll-leak.c000066400000000000000000000027071461424365000165420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test a mem leak with IOPOLL */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024) #define BS 4096 #define BUFFERS (FILE_SIZE / BS) static int do_iopoll(const char *fname) { struct io_uring_sqe *sqe; struct io_uring ring; struct iovec *iov; int fd; fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); return T_EXIT_SKIP; } iov = t_create_buffers(1, 4096); t_create_ring(2, &ring, IORING_SETUP_IOPOLL); sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, 0); io_uring_submit(&ring); close(fd); return T_EXIT_PASS; } static int test(const char *fname) { if (fork()) { int stat; wait(&stat); return WEXITSTATUS(stat); } else { int ret; ret = do_iopoll(fname); exit(ret); } } int main(int argc, char *argv[]) { char buf[256]; char *fname; int i, ret; if (argc > 1) { fname = argv[1]; } else { srand((unsigned)time(NULL)); snprintf(buf, sizeof(buf), ".iopoll-leak-%u-%u", (unsigned)rand(), (unsigned)getpid()); fname = buf; t_create_file(fname, FILE_SIZE); } for (i = 0; i < 16; i++) { ret = test(fname); if (ret == T_EXIT_SKIP || ret == T_EXIT_FAIL) break; } if (fname != argv[1]) unlink(fname); return ret; } liburing-2.6/test/iopoll-overflow.c000066400000000000000000000044021461424365000174630ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: IOPOLL with overflow test case */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "../src/syscall.h" #define FILE_SIZE (128 * 1024) #define BS 4096 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; static int test(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; int i, j, ret; loff_t off; off = FILE_SIZE - BS; for (j = 0; j < 8; j++) { for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fd, vecs[i].iov_base, vecs[i].iov_len, off); if (!off) off = FILE_SIZE - BS; else off -= BS; } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submitted %d\n", ret); return T_EXIT_FAIL; } } sleep(1); ret = __sys_io_uring_enter(ring->ring_fd, 0, BUFFERS * 8, IORING_ENTER_GETEVENTS, NULL); for (i = 0; i < BUFFERS * 8; i++) { struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring_params p = { }; struct io_uring ring; char buf[256]; char *fname; int ret, fd; p.flags = IORING_SETUP_IOPOLL | IORING_SETUP_CQSIZE; p.cq_entries = 64; ret = t_create_ring_params(64, &ring, &p); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } if (argc > 1) { fname = argv[1]; } else { srand((unsigned)time(NULL)); snprintf(buf, sizeof(buf), ".basic-rw-%u-%u", (unsigned)rand(), (unsigned)getpid()); fname = buf; t_create_file(fname, FILE_SIZE); } fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0) { if (errno == EINVAL) { if (fname != argv[1]) unlink(fname); return T_EXIT_SKIP; } perror("open"); goto err; } vecs = t_create_buffers(BUFFERS, BS); ret = test(&ring, fd); if (fname != argv[1]) unlink(fname); return ret; err: if (fname != argv[1]) unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/iopoll.c000066400000000000000000000224531461424365000156300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: basic read/write tests with polled IO */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "../src/syscall.h" #define FILE_SIZE (128 * 1024) #define BS 4096 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; static int no_buf_select; static int no_iopoll; static int provide_buffers(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, vecs[i].iov_len, 1, 1, i); } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submit: %d\n", ret); return 1; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (cqe->res < 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); } return 0; } static int __test_io(const char *file, struct io_uring *ring, int write, int sqthread, int fixed, int buf_select) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int open_flags; int i, fd = -1, ret; off_t offset; if (buf_select) { write = 0; fixed = 0; } if (buf_select && provide_buffers(ring)) return 1; if (write) open_flags = O_WRONLY; else open_flags = O_RDONLY; open_flags |= O_DIRECT; if (fixed) { ret = t_register_buffers(ring, vecs, BUFFERS); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "buffer reg failed: %d\n", ret); goto err; } } fd = open(file, open_flags); if (fd < 0) { if (errno == EINVAL) return 0; perror("file open"); goto err; } if (sqthread) { ret = io_uring_register_files(ring, &fd, 1); if (ret) { fprintf(stderr, "file reg failed: %d\n", ret); goto err; } } offset = 0; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } offset = BS * (rand() % BUFFERS); if (write) { int do_fixed = fixed; int use_fd = fd; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); } else { io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, offset); } } else { int do_fixed = fixed; int use_fd = fd; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); } else { io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, offset); } } if (sqthread) sqe->flags |= IOSQE_FIXED_FILE; if (buf_select) { sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = buf_select; sqe->user_data = i; } } ret = io_uring_submit(ring); if (ret != BUFFERS) { ret = io_uring_peek_cqe(ring, &cqe); if (!ret && cqe->res == -EOPNOTSUPP) { no_iopoll = 1; io_uring_cqe_seen(ring, cqe); goto out; } fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); goto err; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } else if (cqe->res == -EOPNOTSUPP) { fprintf(stdout, "File/device/fs doesn't support polled IO\n"); no_iopoll = 1; goto out; } else if (cqe->res != BS) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); goto err; } io_uring_cqe_seen(ring, cqe); } if (fixed) { ret = io_uring_unregister_buffers(ring); if (ret) { fprintf(stderr, "buffer unreg failed: %d\n", ret); goto err; } } if (sqthread) { ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "file unreg failed: %d\n", ret); goto err; } } out: close(fd); return 0; err: if (fd != -1) close(fd); return 1; } static void sig_alrm(int sig) { fprintf(stderr, "Ran out of time for peek test!\n"); exit(T_EXIT_FAIL); } /* * if we are polling, io_uring_cqe_peek() always needs to enter the kernel */ static int test_io_uring_cqe_peek(const char *file) { struct io_uring_cqe *cqe; struct io_uring ring; struct sigaction act; int fd, i, ret = T_EXIT_FAIL; if (no_iopoll) return 0; ret = io_uring_queue_init(64, &ring, IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } fd = open(file, O_RDONLY | O_DIRECT); if (fd < 0) { if (errno == EINVAL) { io_uring_queue_exit(&ring); return T_EXIT_SKIP; } perror("file open"); goto err; } for (i = 0; i < BUFFERS; i++) { struct io_uring_sqe *sqe; off_t offset = BS * (rand() % BUFFERS); sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset); sqe->user_data = 1; } /* * Set alarm for 5 seconds, we should be done way before that */ memset(&act, 0, sizeof(act)); act.sa_handler = sig_alrm; sigaction(SIGALRM, &act, NULL); alarm(5); ret = io_uring_submit(&ring); if (ret != BUFFERS) { fprintf(stderr, "submit=%d\n", ret); goto err; } ret = T_EXIT_PASS; i = 0; do { ret = io_uring_peek_cqe(&ring, &cqe); if (ret) continue; io_uring_cqe_seen(&ring, cqe); i++; } while (i < BUFFERS); err: if (fd != -1) close(fd); io_uring_queue_exit(&ring); return ret; } /* * if we are polling io_uring_submit needs to always enter the * kernel to fetch events */ static int test_io_uring_submit_enters(const char *file) { struct io_uring ring; int fd, i, ret, ring_flags, open_flags; unsigned head; struct io_uring_cqe *cqe; if (no_iopoll) return 0; ring_flags = IORING_SETUP_IOPOLL; ret = io_uring_queue_init(64, &ring, ring_flags); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } open_flags = O_WRONLY | O_DIRECT; fd = open(file, open_flags); if (fd < 0) { if (errno == EINVAL) return T_EXIT_SKIP; perror("file open"); goto err; } for (i = 0; i < BUFFERS; i++) { struct io_uring_sqe *sqe; off_t offset = BS * (rand() % BUFFERS); sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, fd, &vecs[i], 1, offset); sqe->user_data = 1; } /* submit manually to avoid adding IORING_ENTER_GETEVENTS */ ret = __sys_io_uring_enter(ring.ring_fd, __io_uring_flush_sq(&ring), 0, 0, NULL); if (ret < 0) goto err; for (i = 0; i < 500; i++) { ret = io_uring_submit(&ring); if (ret != 0) { fprintf(stderr, "still had %d sqes to submit, this is unexpected", ret); goto err; } io_uring_for_each_cqe(&ring, head, cqe) { /* runs after test_io so should not have happened */ if (cqe->res == -EOPNOTSUPP) { fprintf(stdout, "File/device/fs doesn't support polled IO\n"); goto err; } goto ok; } usleep(10000); } err: ret = 1; if (fd != -1) close(fd); ok: io_uring_queue_exit(&ring); return ret; } static int test_io(const char *file, int write, int sqthread, int fixed, int buf_select, int defer) { struct io_uring ring; int ret, ring_flags = IORING_SETUP_IOPOLL; if (no_iopoll) return 0; if (defer) ring_flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = t_create_ring(64, &ring, ring_flags); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = __test_io(file, &ring, write, sqthread, fixed, buf_select); io_uring_queue_exit(&ring); return ret; } static int probe_buf_select(void) { struct io_uring_probe *p; struct io_uring ring; int ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } p = io_uring_get_probe_ring(&ring); if (!p || !io_uring_opcode_supported(p, IORING_OP_PROVIDE_BUFFERS)) { no_buf_select = 1; fprintf(stdout, "Buffer select not supported, skipping\n"); return 0; } io_uring_free_probe(p); return 0; } int main(int argc, char *argv[]) { int i, ret, nr; char buf[256]; char *fname; if (probe_buf_select()) return T_EXIT_FAIL; if (argc > 1) { fname = argv[1]; } else { srand((unsigned)time(NULL)); snprintf(buf, sizeof(buf), ".basic-rw-%u-%u", (unsigned)rand(), (unsigned)getpid()); fname = buf; t_create_file(fname, FILE_SIZE); } vecs = t_create_buffers(BUFFERS, BS); nr = 32; if (no_buf_select) nr = 8; else if (!t_probe_defer_taskrun()) nr = 16; for (i = 0; i < nr; i++) { int write = (i & 1) != 0; int sqthread = (i & 2) != 0; int fixed = (i & 4) != 0; int buf_select = (i & 8) != 0; int defer = (i & 16) != 0; ret = test_io(fname, write, sqthread, fixed, buf_select, defer); if (ret) { fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n", write, sqthread, fixed, buf_select, defer); goto err; } if (no_iopoll) break; } ret = test_io_uring_submit_enters(fname); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_io_uring_submit_enters failed\n"); goto err; } /* * Keep this last, it exits on failure */ ret = test_io_uring_cqe_peek(fname); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_io_uring_cqe_peek failed\n"); goto err; } if (fname != argv[1]) unlink(fname); return T_EXIT_PASS; err: if (fname != argv[1]) unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/lfs-openat-write.c000066400000000000000000000047061461424365000175330ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static const int RSIZE = 2; static const int OPEN_FLAGS = O_RDWR | O_CREAT | O_LARGEFILE; static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; #define DIE(...) \ do { \ fprintf(stderr, __VA_ARGS__); \ abort(); \ } while(0) static int do_write(struct io_uring *ring, int fd, off_t offset) { char buf[] = "some test write buf"; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int res, ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_write(sqe, fd, buf, sizeof(buf), offset); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit write: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); return 1; } res = cqe->res; io_uring_cqe_seen(ring, cqe); if (res < 0) { fprintf(stderr, "write failed: %s\n", strerror(-res)); return 1; } return 0; } static int test_open_write(struct io_uring *ring, int dfd, const char *fn) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fd = -1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); return 1; } fd = cqe->res; io_uring_cqe_seen(ring, cqe); if (fd < 0) { fprintf(stderr, "openat failed: %s\n", strerror(-fd)); return 1; } return do_write(ring, fd, 1ULL << 32); } int main(int argc, char *argv[]) { struct io_uring ring; int dfd, ret; if (argc > 1) return T_EXIT_SKIP; dfd = open("/tmp", O_RDONLY | O_DIRECTORY); if (dfd < 0) DIE("open /tmp: %s\n", strerror(errno)); ret = io_uring_queue_init(RSIZE, &ring, 0); if (ret < 0) DIE("failed to init io_uring: %s\n", strerror(-ret)); ret = test_open_write(&ring, dfd, "io_uring_openat_write_test1"); io_uring_queue_exit(&ring); close(dfd); unlink("/tmp/io_uring_openat_write_test1"); return ret; } liburing-2.6/test/lfs-openat.c000066400000000000000000000131111461424365000163710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #define DIE(...) \ do { \ fprintf(stderr, __VA_ARGS__); \ abort(); \ } while(0) static const int RSIZE = 2; static const int OPEN_FLAGS = O_RDWR | O_CREAT | O_LARGEFILE; static const mode_t OPEN_MODE = S_IRUSR | S_IWUSR; static int open_io_uring(struct io_uring *ring, int dfd, const char *fn) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fd; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return 1; } ret = io_uring_wait_cqe(ring, &cqe); fd = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0) { fprintf(stderr, "wait_cqe failed: %s\n", strerror(-ret)); return 1; } else if (fd < 0) { fprintf(stderr, "io_uring openat failed: %s\n", strerror(-fd)); return 1; } close(fd); return 0; } static int prepare_file(int dfd, const char* fn) { const char buf[] = "foo"; int fd, res; fd = openat(dfd, fn, OPEN_FLAGS, OPEN_MODE); if (fd < 0) { fprintf(stderr, "prepare/open: %s\n", strerror(errno)); return -1; } res = pwrite(fd, buf, sizeof(buf), 1ull << 32); if (res < 0) fprintf(stderr, "prepare/pwrite: %s\n", strerror(errno)); close(fd); return res < 0 ? res : 0; } static int test_linked_files(int dfd, const char *fn, bool async) { struct io_uring ring; struct io_uring_sqe *sqe; char buffer[128]; struct iovec iov = {.iov_base = buffer, .iov_len = sizeof(buffer), }; int ret, fd; int fds[2]; ret = io_uring_queue_init(10, &ring, 0); if (ret < 0) DIE("failed to init io_uring: %s\n", strerror(-ret)); if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); return -1; } io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; if (async) sqe->flags |= IOSQE_ASYNC; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return 1; } fd = dup(ring.ring_fd); if (fd < 0) { fprintf(stderr, "dup() failed: %s\n", strerror(-fd)); return 1; } /* io_uring->flush() */ close(fd); io_uring_queue_exit(&ring); return 0; } static int test_drained_files(int dfd, const char *fn, bool linked, bool prepend) { struct io_uring ring; struct io_uring_sqe *sqe; char buffer[128]; struct iovec iov = {.iov_base = buffer, .iov_len = sizeof(buffer), }; int ret, fd, fds[2], to_cancel = 0; ret = io_uring_queue_init(10, &ring, 0); if (ret < 0) DIE("failed to init io_uring: %s\n", strerror(-ret)); if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); return -1; } io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->user_data = 0; if (prepend) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_DRAIN; to_cancel++; sqe->user_data = to_cancel; } if (linked) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_DRAIN | IOSQE_IO_LINK; to_cancel++; sqe->user_data = to_cancel; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_openat(sqe, dfd, fn, OPEN_FLAGS, OPEN_MODE); sqe->flags |= IOSQE_IO_DRAIN; to_cancel++; sqe->user_data = to_cancel; ret = io_uring_submit(&ring); if (ret != 1 + to_cancel) { fprintf(stderr, "failed to submit openat: %s\n", strerror(-ret)); return 1; } fd = dup(ring.ring_fd); if (fd < 0) { fprintf(stderr, "dup() failed: %s\n", strerror(-fd)); return 1; } /* * close(), which triggers ->flush(), and io_uring_queue_exit() * should successfully return and not hang. */ close(fd); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { const char *fn = "io_uring_openat_test"; struct io_uring ring; int ret, dfd; if (argc > 1) return 0; dfd = open("/tmp", O_PATH); if (dfd < 0) DIE("open /tmp: %s\n", strerror(errno)); ret = io_uring_queue_init(RSIZE, &ring, 0); if (ret < 0) DIE("failed to init io_uring: %s\n", strerror(-ret)); if (prepare_file(dfd, fn)) return 1; ret = open_io_uring(&ring, dfd, fn); if (ret) { fprintf(stderr, "open_io_uring() failed\n"); goto out; } ret = test_linked_files(dfd, fn, false); if (ret) { fprintf(stderr, "test_linked_files() !async failed\n"); goto out; } ret = test_linked_files(dfd, fn, true); if (ret) { fprintf(stderr, "test_linked_files() async failed\n"); goto out; } ret = test_drained_files(dfd, fn, false, false); if (ret) { fprintf(stderr, "test_drained_files() failed\n"); goto out; } ret = test_drained_files(dfd, fn, false, true); if (ret) { fprintf(stderr, "test_drained_files() middle failed\n"); goto out; } ret = test_drained_files(dfd, fn, true, false); if (ret) { fprintf(stderr, "test_drained_files() linked failed\n"); goto out; } out: io_uring_queue_exit(&ring); close(dfd); unlink("/tmp/io_uring_openat_test"); return ret; } liburing-2.6/test/link-timeout.c000066400000000000000000000515341461424365000167550ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various linked timeout cases * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int test_fail_lone_link_timeouts(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_link_timeout(sqe, &ts, 0); ts.tv_sec = 1; ts.tv_nsec = 0; sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; ret = io_uring_submit(ring); if (ret != 1) { printf("sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } if (cqe->user_data != 1) { fprintf(stderr, "invalid user data %d\n", cqe->res); goto err; } if (cqe->res != -EINVAL) { fprintf(stderr, "got %d, wanted -EINVAL\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static int test_fail_two_link_timeouts(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i, nr_wait; ts.tv_sec = 1; ts.tv_nsec = 0; /* * sqe_1: write destined to fail * use buf=NULL, to do that during the issuing stage */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_writev(sqe, 0, NULL, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; /* sqe_2: valid linked timeout */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; sqe->flags |= IOSQE_IO_LINK; /* sqe_3: invalid linked timeout */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 3; /* sqe_4: invalid linked timeout */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 4; ret = io_uring_submit(ring); if (ret < 3) { printf("sqe submit failed: %d\n", ret); goto err; } nr_wait = ret; for (i = 0; i < nr_wait; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res != -EFAULT && cqe->res != -ECANCELED) { fprintf(stderr, "write got %d, wanted -EFAULT " "or -ECANCELED\n", cqe->res); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "Link timeout got %d, wanted -ECACNCELED\n", cqe->res); goto err; } break; case 3: /* fall through */ case 4: if (cqe->res != -ECANCELED && cqe->res != -EINVAL) { fprintf(stderr, "Invalid link timeout got %d" ", wanted -ECACNCELED || -EINVAL\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test linked timeout with timeout (timeoutception) */ static int test_single_link_timeout_ception(struct io_uring *ring) { struct __kernel_timespec ts1, ts2; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts1.tv_sec = 1; ts1.tv_nsec = 0; io_uring_prep_timeout(sqe, &ts1, -1U, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts2.tv_sec = 2; ts2.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts2, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: /* newer kernels allow timeout links */ if (cqe->res != -EINVAL && cqe->res != -ETIME) { fprintf(stderr, "Timeout got %d, wanted " "-EINVAL or -ETIME\n", cqe->res); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "Link timeout got %d, wanted -ECANCELED\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test linked timeout with NOP */ static int test_single_link_timeout_nop(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 1; ts.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res) { fprintf(stderr, "NOP got %d, wanted 0\n", cqe->res); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "Link timeout got %d, wanted -ECACNCELED\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test read that will not complete, with a linked timeout behind it that * has errors in the SQE */ static int test_single_link_timeout_error(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; struct iovec iov; char buffer[128]; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } iov.iov_base = buffer; iov.iov_len = sizeof(buffer); io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 1; ts.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts, 0); /* set invalid field, it'll get failed */ sqe->ioprio = 89; sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res != -ECANCELED) { fprintf(stderr, "Read got %d, wanted -ECANCELED\n", cqe->res); goto err; } break; case 2: if (cqe->res != -EINVAL) { fprintf(stderr, "Link timeout got %d, wanted -EINVAL\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test read that will complete, with a linked timeout behind it */ static int test_single_link_no_timeout(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; struct iovec iov; char buffer[128]; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } iov.iov_base = buffer; iov.iov_len = sizeof(buffer); io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 1; ts.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } iov.iov_base = buffer; iov.iov_len = sizeof(buffer); io_uring_prep_writev(sqe, fds[1], &iov, 1, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret != 3) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: case 3: if (cqe->res != sizeof(buffer)) { fprintf(stderr, "R/W got %d, wanted %d\n", cqe->res, (int) sizeof(buffer)); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "Link timeout %d, wanted -ECANCELED\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test read that will not complete, with a linked timeout behind it */ static int test_single_link_timeout(struct io_uring *ring, unsigned nsec) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; struct iovec iov; char buffer[128]; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } iov.iov_base = buffer; iov.iov_len = sizeof(buffer); io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = nsec; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret != 2) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res != -EINTR && cqe->res != -ECANCELED) { fprintf(stderr, "Read got %d\n", cqe->res); goto err; } break; case 2: if (cqe->res != -EALREADY && cqe->res != -ETIME && cqe->res != 0) { fprintf(stderr, "Link timeout got %d\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); return 0; err: return 1; } static int test_timeout_link_chain1(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; struct iovec iov; char buffer[128]; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } iov.iov_base = buffer; iov.iov_len = sizeof(buffer); io_uring_prep_readv(sqe, fds[0], &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 1000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret != 3) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res != -EINTR && cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 2: /* FASTPOLL kernels can cancel successfully */ if (cqe->res != -EALREADY && cqe->res != -ETIME) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 3: if (cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); return 0; err: return 1; } static int test_timeout_link_chain2(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 1000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 3; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 4; ret = io_uring_submit(ring); if (ret != 4) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 4; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { /* poll cancel really should return -ECANCEL... */ case 1: if (cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 2: if (cqe->res != -ETIME) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 3: case 4: if (cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); return 0; err: return 1; } static int test_timeout_link_chain3(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 1000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 3; /* POLL -> TIMEOUT -> NOP */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 4; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 1000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 5; /* poll on pipe + timeout */ sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 6; /* nop */ ret = io_uring_submit(ring); if (ret != 6) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 6; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 2: if (cqe->res != -ETIME) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 1: case 3: case 4: case 5: if (cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 6: if (cqe->res) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); return 0; err: return 1; } static int test_timeout_link_chain4(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i; if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 1000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret != 3) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { /* poll cancel really should return -ECANCEL... */ case 1: if (cqe->res) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; case 3: if (cqe->res != -ETIME) { fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data, cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } close(fds[0]); close(fds[1]); return 0; err: return 1; } static int test_timeout_link_chain5(struct io_uring *ring) { struct __kernel_timespec ts1, ts2; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts1.tv_sec = 1; ts1.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } ts2.tv_sec = 2; ts2.tv_nsec = 0; io_uring_prep_link_timeout(sqe, &ts2, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret != 3) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: case 2: if (cqe->res && cqe->res != -ECANCELED) { fprintf(stderr, "Request got %d, wanted -EINVAL " "or -ECANCELED\n", cqe->res); goto err; } break; case 3: if (cqe->res != -ECANCELED && cqe->res != -EINVAL) { fprintf(stderr, "Link timeout got %d, wanted -ECANCELED\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } ret = test_timeout_link_chain1(&ring); if (ret) { printf("test_single_link_chain1 failed\n"); return ret; } ret = test_timeout_link_chain2(&ring); if (ret) { printf("test_single_link_chain2 failed\n"); return ret; } ret = test_timeout_link_chain3(&ring); if (ret) { printf("test_single_link_chain3 failed\n"); return ret; } ret = test_timeout_link_chain4(&ring); if (ret) { printf("test_single_link_chain4 failed\n"); return ret; } ret = test_timeout_link_chain5(&ring); if (ret) { printf("test_single_link_chain5 failed\n"); return ret; } ret = test_single_link_timeout(&ring, 10); if (ret) { printf("test_single_link_timeout 10 failed\n"); return ret; } ret = test_single_link_timeout(&ring, 100000ULL); if (ret) { printf("test_single_link_timeout 100000 failed\n"); return ret; } ret = test_single_link_timeout(&ring, 500000000ULL); if (ret) { printf("test_single_link_timeout 500000000 failed\n"); return ret; } ret = test_single_link_no_timeout(&ring); if (ret) { printf("test_single_link_no_timeout failed\n"); return ret; } ret = test_single_link_timeout_error(&ring); if (ret) { printf("test_single_link_timeout_error failed\n"); return ret; } ret = test_single_link_timeout_nop(&ring); if (ret) { printf("test_single_link_timeout_nop failed\n"); return ret; } ret = test_single_link_timeout_ception(&ring); if (ret) { printf("test_single_link_timeout_ception failed\n"); return ret; } ret = test_fail_lone_link_timeouts(&ring); if (ret) { printf("test_fail_lone_link_timeouts failed\n"); return ret; } ret = test_fail_two_link_timeouts(&ring); if (ret) { printf("test_fail_two_link_timeouts failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/link.c000066400000000000000000000215011461424365000152600ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various linked sqe tests * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_hardlink; /* * Timer with single nop */ static int test_single_hardlink(struct io_uring *ring) { struct __kernel_timespec ts; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } ts.tv_sec = 0; ts.tv_nsec = 10000000ULL; io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (!cqe) { fprintf(stderr, "failed to get cqe\n"); goto err; } if (no_hardlink) goto next; if (cqe->user_data == 1 && cqe->res == -EINVAL) { fprintf(stdout, "Hard links not supported, skipping\n"); no_hardlink = 1; goto next; } if (cqe->user_data == 1 && cqe->res != -ETIME) { fprintf(stderr, "timeout failed with %d\n", cqe->res); goto err; } if (cqe->user_data == 2 && cqe->res) { fprintf(stderr, "nop failed with %d\n", cqe->res); goto err; } next: io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Timer -> timer -> nop */ static int test_double_hardlink(struct io_uring *ring) { struct __kernel_timespec ts1, ts2; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; if (no_hardlink) return 0; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } ts1.tv_sec = 0; ts1.tv_nsec = 10000000ULL; io_uring_prep_timeout(sqe, &ts1, 0, 0); sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } ts2.tv_sec = 0; ts2.tv_nsec = 15000000ULL; io_uring_prep_timeout(sqe, &ts2, 0, 0); sqe->flags |= IOSQE_IO_LINK | IOSQE_IO_HARDLINK; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (!cqe) { fprintf(stderr, "failed to get cqe\n"); goto err; } if (cqe->user_data == 1 && cqe->res != -ETIME) { fprintf(stderr, "timeout failed with %d\n", cqe->res); goto err; } if (cqe->user_data == 2 && cqe->res != -ETIME) { fprintf(stderr, "timeout failed with %d\n", cqe->res); goto err; } if (cqe->user_data == 3 && cqe->res) { fprintf(stderr, "nop failed with %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test failing head of chain, and dependent getting -ECANCELED */ static int test_single_link_fail(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_remove_buffers(sqe, 10, 1); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_peek_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } if (!cqe) { printf("failed to get cqe\n"); goto err; } if (i == 0 && cqe->res != -ENOENT) { printf("sqe0 failed with %d, wanted -ENOENT\n", cqe->res); goto err; } if (i == 1 && cqe->res != -ECANCELED) { printf("sqe1 failed with %d, wanted -ECANCELED\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test two independent chains */ static int test_double_chain(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 4; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test multiple dependents */ static int test_double_link(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test single dependency */ static int test_single_link(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { printf("sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int test_early_fail_and_wait(void) { struct io_uring ring; struct io_uring_sqe *sqe; int ret, invalid_fd = 42; struct iovec iov = { .iov_base = NULL, .iov_len = 0 }; /* create a new ring as it leaves it dirty */ ret = io_uring_queue_init(8, &ring, 0); if (ret) { printf("ring setup failed\n"); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_readv(sqe, invalid_fd, &iov, 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(&ring); if (!sqe) { printf("get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit_and_wait(&ring, 2); if (ret <= 0 && ret != -EAGAIN) { printf("sqe submit failed: %d\n", ret); goto err; } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } int main(int argc, char *argv[]) { struct io_uring ring, poll_ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &poll_ring, IORING_SETUP_IOPOLL); if (ret) { printf("poll_ring setup failed\n"); return T_EXIT_FAIL; } ret = test_single_link(&ring); if (ret) { printf("test_single_link failed\n"); return ret; } ret = test_double_link(&ring); if (ret) { printf("test_double_link failed\n"); return ret; } ret = test_double_chain(&ring); if (ret) { printf("test_double_chain failed\n"); return ret; } ret = test_single_link_fail(&poll_ring); if (ret) { printf("test_single_link_fail failed\n"); return ret; } ret = test_single_hardlink(&ring); if (ret) { fprintf(stderr, "test_single_hardlink\n"); return ret; } ret = test_double_hardlink(&ring); if (ret) { fprintf(stderr, "test_double_hardlink\n"); return ret; } ret = test_early_fail_and_wait(); if (ret) { fprintf(stderr, "test_early_fail_and_wait\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/link_drain.c000066400000000000000000000112151461424365000164360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring link io with drain io * */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int test_link_drain_one(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe[5]; struct iovec iovecs; int i, fd, ret; off_t off = 0; char data[5] = {0}; char expect[5] = {0, 1, 2, 3, 4}; fd = open("testfile", O_WRONLY | O_CREAT, 0644); if (fd < 0) { perror("open"); return 1; } iovecs.iov_base = t_malloc(4096); iovecs.iov_len = 4096; for (i = 0; i < 5; i++) { sqe[i] = io_uring_get_sqe(ring); if (!sqe[i]) { printf("get sqe failed\n"); goto err; } } /* normal heavy io */ io_uring_prep_writev(sqe[0], fd, &iovecs, 1, off); sqe[0]->user_data = 0; /* link io */ io_uring_prep_nop(sqe[1]); sqe[1]->flags |= IOSQE_IO_LINK; sqe[1]->user_data = 1; /* link drain io */ io_uring_prep_nop(sqe[2]); sqe[2]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); sqe[2]->user_data = 2; /* link io */ io_uring_prep_nop(sqe[3]); sqe[3]->user_data = 3; /* normal nop io */ io_uring_prep_nop(sqe[4]); sqe[4]->user_data = 4; ret = io_uring_submit(ring); if (ret < 0) { printf("sqe submit failed\n"); goto err; } else if (ret < 5) { printf("Submitted only %d\n", ret); goto err; } for (i = 0; i < 5; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("child: wait completion %d\n", ret); goto err; } data[i] = cqe->user_data; io_uring_cqe_seen(ring, cqe); } if (memcmp(data, expect, 5) != 0) goto err; free(iovecs.iov_base); close(fd); unlink("testfile"); return 0; err: free(iovecs.iov_base); close(fd); unlink("testfile"); return 1; } static int test_link_drain_multi(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe[9]; struct iovec iovecs; int i, fd, ret; off_t off = 0; char data[9] = {0}; char expect[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; fd = open("testfile", O_WRONLY | O_CREAT, 0644); if (fd < 0) { perror("open"); return 1; } unlink("testfile"); iovecs.iov_base = t_malloc(4096); iovecs.iov_len = 4096; for (i = 0; i < 9; i++) { sqe[i] = io_uring_get_sqe(ring); if (!sqe[i]) { printf("get sqe failed\n"); goto err; } } /* normal heavy io */ io_uring_prep_writev(sqe[0], fd, &iovecs, 1, off); sqe[0]->user_data = 0; /* link1 io head */ io_uring_prep_nop(sqe[1]); sqe[1]->flags |= IOSQE_IO_LINK; sqe[1]->user_data = 1; /* link1 drain io */ io_uring_prep_nop(sqe[2]); sqe[2]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); sqe[2]->user_data = 2; /* link1 io end*/ io_uring_prep_nop(sqe[3]); sqe[3]->user_data = 3; /* link2 io head */ io_uring_prep_nop(sqe[4]); sqe[4]->flags |= IOSQE_IO_LINK; sqe[4]->user_data = 4; /* link2 io */ io_uring_prep_nop(sqe[5]); sqe[5]->flags |= IOSQE_IO_LINK; sqe[5]->user_data = 5; /* link2 drain io */ io_uring_prep_writev(sqe[6], fd, &iovecs, 1, off); sqe[6]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN); sqe[6]->user_data = 6; /* link2 io end */ io_uring_prep_nop(sqe[7]); sqe[7]->user_data = 7; /* normal io */ io_uring_prep_nop(sqe[8]); sqe[8]->user_data = 8; ret = io_uring_submit(ring); if (ret < 0) { printf("sqe submit failed\n"); goto err; } else if (ret < 9) { printf("Submitted only %d\n", ret); goto err; } for (i = 0; i < 9; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("child: wait completion %d\n", ret); goto err; } data[i] = cqe->user_data; io_uring_cqe_seen(ring, cqe); } if (memcmp(data, expect, 9) != 0) goto err; free(iovecs.iov_base); close(fd); return 0; err: free(iovecs.iov_base); close(fd); return 1; } static int test_drain(bool defer) { struct io_uring ring; int i, ret; unsigned int flags = 0; if (defer) flags = IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init(100, &ring, flags); if (ret) { printf("ring setup failed\n"); return 1; } for (i = 0; i < 1000; i++) { ret = test_link_drain_one(&ring); if (ret) { fprintf(stderr, "test_link_drain_one failed\n"); break; } ret = test_link_drain_multi(&ring); if (ret) { fprintf(stderr, "test_link_drain_multi failed\n"); break; } } return ret; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test_drain(false); if (ret) { fprintf(stderr, "test_drain(false) failed\n"); return T_EXIT_FAIL; } if (t_probe_defer_taskrun()) { ret = test_drain(true); if (ret) { fprintf(stderr, "test_drain(true) failed\n"); return T_EXIT_FAIL; } } return T_EXIT_PASS; } liburing-2.6/test/madvise.c000066400000000000000000000072131461424365000157570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: basic madvise test */ #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024) #define LOOPS 100 #define MIN_LOOPS 10 static unsigned long long utime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000000; return sec + usec; } static unsigned long long utime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return utime_since(tv, &end); } static int do_madvise(struct io_uring *ring, void *addr, off_t len, int advice) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed to get sqe\n"); return 1; } io_uring_prep_madvise(sqe, addr, len, advice); sqe->user_data = advice; ret = io_uring_submit_and_wait(ring, 1); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return 1; } ret = cqe->res; if (ret == -EINVAL || ret == -EBADF) { fprintf(stdout, "Madvise not supported, skipping\n"); unlink(".madvise.tmp"); exit(0); } else if (ret) { fprintf(stderr, "cqe->res=%d\n", cqe->res); } io_uring_cqe_seen(ring, cqe); return ret; } static long do_copy(int fd, char *buf, void *ptr) { struct timeval tv; gettimeofday(&tv, NULL); memcpy(buf, ptr, FILE_SIZE); return utime_since_now(&tv); } static int test_madvise(struct io_uring *ring, const char *filename) { unsigned long cached_read, uncached_read, cached_read2; int fd, ret; char *buf; void *ptr; fd = open(filename, O_RDONLY); if (fd < 0) { perror("open"); return 1; } buf = t_malloc(FILE_SIZE); ptr = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0); if (ptr == MAP_FAILED) { perror("mmap"); return 1; } cached_read = do_copy(fd, buf, ptr); if (cached_read == -1) return 1; cached_read = do_copy(fd, buf, ptr); if (cached_read == -1) return 1; ret = do_madvise(ring, ptr, FILE_SIZE, MADV_DONTNEED); if (ret) return 1; uncached_read = do_copy(fd, buf, ptr); if (uncached_read == -1) return 1; ret = do_madvise(ring, ptr, FILE_SIZE, MADV_DONTNEED); if (ret) return 1; ret = do_madvise(ring, ptr, FILE_SIZE, MADV_WILLNEED); if (ret) return 1; msync(ptr, FILE_SIZE, MS_SYNC); cached_read2 = do_copy(fd, buf, ptr); if (cached_read2 == -1) return 1; if (cached_read < uncached_read && cached_read2 < uncached_read) return 0; return 2; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, i, good, bad; char *fname; if (argc > 1) { fname = argv[1]; } else { fname = ".madvise.tmp"; t_create_file(fname, FILE_SIZE); } if (io_uring_queue_init(8, &ring, 0)) { fprintf(stderr, "ring creation failed\n"); goto err; } good = bad = 0; for (i = 0; i < LOOPS; i++) { ret = test_madvise(&ring, fname); if (ret == 1) { fprintf(stderr, "test_madvise failed\n"); goto err; } else if (!ret) good++; else if (ret == 2) bad++; if (i >= MIN_LOOPS && !bad) break; } /* too hard to reliably test, just ignore */ if ((0) && bad > good) fprintf(stderr, "Suspicious timings (%u > %u)\n", bad, good); if (fname != argv[1]) unlink(fname); io_uring_queue_exit(&ring); return T_EXIT_PASS; err: if (fname != argv[1]) unlink(fname); return T_EXIT_FAIL; } liburing-2.6/test/mkdir.c000066400000000000000000000041201461424365000154270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring mkdirat handling */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int do_mkdirat(struct io_uring *ring, const char *fn) { int ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } io_uring_prep_mkdirat(sqe, AT_FDCWD, fn, 0700); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0); if (ret) { fprintf(stderr, "wait_cqe failed: %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int stat_file(const char *fn) { struct stat sb; if (!stat(fn, &sb)) return 0; return errno; } int main(int argc, char *argv[]) { static const char fn[] = "io_uring-mkdirat-test"; int ret; struct io_uring ring; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return ret; } ret = do_mkdirat(&ring, fn); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "mkdirat not supported, skipping\n"); goto skip; } fprintf(stderr, "mkdirat: %s\n", strerror(-ret)); goto err; } else if (ret) { goto err; } if (stat_file(fn)) { perror("stat"); goto err; } ret = do_mkdirat(&ring, fn); if (ret != -EEXIST) { fprintf(stderr, "do_mkdirat already exists failed: %d\n", ret); goto err1; } ret = do_mkdirat(&ring, "surely/this/wont/exist"); if (ret != -ENOENT) { fprintf(stderr, "do_mkdirat no parent failed: %d\n", ret); goto err1; } unlinkat(AT_FDCWD, fn, AT_REMOVEDIR); io_uring_queue_exit(&ring); return T_EXIT_PASS; skip: unlinkat(AT_FDCWD, fn, AT_REMOVEDIR); io_uring_queue_exit(&ring); return T_EXIT_SKIP; err1: unlinkat(AT_FDCWD, fn, AT_REMOVEDIR); err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/msg-ring-fd.c000066400000000000000000000144751461424365000164510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test fd passing with MSG_RING * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_msg; static int no_sparse; struct data { pthread_t thread; pthread_barrier_t barrier; int ring_flags; int ring_fd; char buf[32]; }; static void *thread_fn(void *__data) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct data *d = __data; struct io_uring ring; int ret, fd = -1; io_uring_queue_init(8, &ring, d->ring_flags); ret = io_uring_register_files(&ring, &fd, 1); if (ret) { if (ret != -EINVAL && ret != -EBADF) fprintf(stderr, "thread file register: %d\n", ret); no_sparse = 1; pthread_barrier_wait(&d->barrier); return NULL; } d->ring_fd = ring.ring_fd; pthread_barrier_wait(&d->barrier); /* wait for MSG */ ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe dst: %d\n", ret); return NULL; } if (cqe->res < 0) { fprintf(stderr, "cqe error dst: %d\n", cqe->res); return NULL; } fd = cqe->res; io_uring_cqe_seen(&ring, cqe); sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, d->buf, sizeof(d->buf), 0); sqe->flags |= IOSQE_FIXED_FILE; io_uring_submit(&ring); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe dst: %d\n", ret); return NULL; } if (cqe->res < 0) { fprintf(stderr, "cqe error dst: %d\n", cqe->res); return NULL; } io_uring_queue_exit(&ring); return NULL; } static int test_remote(struct io_uring *src, int ring_flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fds[2], fd, ret; struct data d; char buf[32]; void *tret; int i; pthread_barrier_init(&d.barrier, NULL, 2); d.ring_flags = ring_flags; pthread_create(&d.thread, NULL, thread_fn, &d); pthread_barrier_wait(&d.barrier); memset(d.buf, 0, sizeof(d.buf)); if (no_sparse) return 0; if (pipe(fds) < 0) { perror("pipe"); return 1; } fd = fds[0]; ret = io_uring_register_files(src, &fd, 1); if (ret) { fprintf(stderr, "register files failed: %d\n", ret); return 1; } for (i = 0; i < ARRAY_SIZE(buf); i++) buf[i] = rand(); sqe = io_uring_get_sqe(src); io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0); sqe->user_data = 1; sqe = io_uring_get_sqe(src); io_uring_prep_msg_ring_fd(sqe, d.ring_fd, 0, 0, 0, 0); sqe->user_data = 2; io_uring_submit(src); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(src, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } if (cqe->user_data == 1 && cqe->res != sizeof(buf)) { fprintf(stderr, "short write %d\n", cqe->res); return 1; } io_uring_cqe_seen(src, cqe); } pthread_join(d.thread, &tret); if (memcmp(buf, d.buf, sizeof(buf))) { fprintf(stderr, "buffers differ\n"); return 1; } close(fds[0]); close(fds[1]); io_uring_unregister_files(src); return 0; } static int test_local(struct io_uring *src, struct io_uring *dst) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fds[2], fd, ret; char buf[32], dst_buf[32]; int i; fd = -1; ret = io_uring_register_files(dst, &fd, 1); if (ret) { if (ret == -EBADF || ret == -EINVAL) return 0; fprintf(stderr, "register files failed: %d\n", ret); return 1; } if (pipe(fds) < 0) { perror("pipe"); return 1; } fd = fds[0]; ret = io_uring_register_files(src, &fd, 1); if (ret) { fprintf(stderr, "register files failed: %d\n", ret); return 1; } memset(dst_buf, 0, sizeof(dst_buf)); for (i = 0; i < ARRAY_SIZE(buf); i++) buf[i] = rand(); sqe = io_uring_get_sqe(src); io_uring_prep_write(sqe, fds[1], buf, sizeof(buf), 0); sqe->user_data = 1; sqe = io_uring_get_sqe(src); io_uring_prep_msg_ring_fd(sqe, dst->ring_fd, 0, 0, 10, 0); sqe->user_data = 2; io_uring_submit(src); fd = -1; for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(src, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } if (cqe->user_data == 1 && cqe->res != sizeof(buf)) { fprintf(stderr, "short write %d\n", cqe->res); return 1; } io_uring_cqe_seen(src, cqe); } ret = io_uring_wait_cqe(dst, &cqe); if (ret) { fprintf(stderr, "wait_cqe dst: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe error dst: %d\n", cqe->res); return 1; } fd = cqe->res; io_uring_cqe_seen(dst, cqe); sqe = io_uring_get_sqe(dst); io_uring_prep_read(sqe, fd, dst_buf, sizeof(dst_buf), 0); sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 3; io_uring_submit(dst); ret = io_uring_wait_cqe(dst, &cqe); if (ret) { fprintf(stderr, "wait_cqe dst: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe error dst: %d\n", cqe->res); return 1; } if (cqe->res != sizeof(dst_buf)) { fprintf(stderr, "short read %d\n", cqe->res); return 1; } if (memcmp(buf, dst_buf, sizeof(buf))) { fprintf(stderr, "buffers differ\n"); return 1; } close(fds[0]); close(fds[1]); io_uring_unregister_files(src); io_uring_unregister_files(dst); return 0; } static int test(int ring_flags) { struct io_uring ring, ring2; int ret; ret = io_uring_queue_init(8, &ring, ring_flags); if (ret) { if (ret == -EINVAL) return 0; fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &ring2, ring_flags); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = test_local(&ring, &ring2); if (ret) { fprintf(stderr, "test local failed\n"); return T_EXIT_FAIL; } if (no_msg) return T_EXIT_SKIP; ret = test_remote(&ring, ring_flags); if (ret) { fprintf(stderr, "test_remote failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); io_uring_queue_exit(&ring2); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0); if (ret != T_EXIT_PASS) { fprintf(stderr, "ring flags 0 failed\n"); return ret; } if (no_msg) return T_EXIT_SKIP; ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); if (ret != T_EXIT_PASS) { fprintf(stderr, "ring flags defer failed\n"); return ret; } return ret; } liburing-2.6/test/msg-ring-flags.c000066400000000000000000000101611461424365000171400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test ring messaging with flags command * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define CUSTOM_FLAG 0x42 #define USER_DATA 0x5aa5 #define LEN 0x20 #define ID 0x1 struct data { pthread_barrier_t barrier; int fd; }; static int recv_msg(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return T_EXIT_FAIL; } if (cqe->user_data != USER_DATA) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); return T_EXIT_FAIL; } if (cqe->res != LEN) { fprintf(stderr, "len %x\n", cqe->res); return T_EXIT_FAIL; } if (cqe->flags != CUSTOM_FLAG) { fprintf(stderr, "flags %x\n", cqe->flags); return T_EXIT_FAIL; } return T_EXIT_PASS; } static int send_msg(struct io_uring *ring, int target_fd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } io_uring_prep_msg_ring_cqe_flags(sqe, target_fd, LEN, USER_DATA, 0, CUSTOM_FLAG); sqe->user_data = ID; ret = io_uring_submit(ring); if (ret <= 0) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "sqe submit failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != 0) { if (cqe->res == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "cqe res %d\n", cqe->res); return T_EXIT_FAIL; } if (cqe->user_data != ID) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); return T_EXIT_PASS; } static void *thread_fn(void *data) { struct data *d = data; struct io_uring ring; int ret; ret = io_uring_queue_init(2, &ring, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER); if (ret) { fprintf(stderr, "ring init failed %d\n", ret); pthread_barrier_wait(&d->barrier); return NULL; } d->fd = ring.ring_fd; pthread_barrier_wait(&d->barrier); if (recv_msg(&ring)) return (void *) 1; return NULL; } static int test(int ring_flags) { struct io_uring ring, ring2; pthread_t thread; struct data d; void *ret2; int ret, i; ret = io_uring_queue_init(2, &ring, ring_flags); if (ret) { fprintf(stderr, "io_uring_queue_init failed for ring1: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(2, &ring2, ring_flags); if (ret) { fprintf(stderr, "io_uring_queue_init failed for ring2: %d\n", ret); return T_EXIT_FAIL; } ret = send_msg(&ring, ring2.ring_fd); if (ret) { if (ret != T_EXIT_SKIP) fprintf(stderr, "send_msg failed: %d\n", ret); return ret; } ret = recv_msg(&ring2); if (ret) { fprintf(stderr, "recv_msg failed: %d\n", ret); return ret; } for (i = 0; i < 8; i++) { ret = send_msg(&ring, ring2.ring_fd); if (ret) { if (ret != T_EXIT_SKIP) fprintf(stderr, "send_msg failed: %d\n", ret); return ret; } } for (i = 0; i < 8; i++) { ret = recv_msg(&ring2); if (ret) { fprintf(stderr, "recv_msg failed: %d\n", ret); return ret; } } pthread_barrier_init(&d.barrier, NULL, 2); d.fd = -1; pthread_create(&thread, NULL, thread_fn, &d); pthread_barrier_wait(&d.barrier); if (d.fd == -1) return T_EXIT_FAIL; ret = send_msg(&ring, d.fd); if (ret) { fprintf(stderr, "send msg failed: %d\n", ret); return ret; } pthread_join(thread, &ret2); if (ret2) { fprintf(stderr, "Remote test failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test ring_flags 0 failed\n"); return ret; } else if (ret == T_EXIT_SKIP) return ret; ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test ring_flags defer failed\n"); return ret; } return ret; } liburing-2.6/test/msg-ring-overflow.c000066400000000000000000000061511461424365000177130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test ring messaging command * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_msg; static int test(struct io_uring *ring, unsigned dst_flags) { struct io_uring_params p = { }; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring dst; int ret, i, err_ret = T_EXIT_FAIL; p.flags = dst_flags | IORING_SETUP_CQSIZE; p.cq_entries = 4; ret = io_uring_queue_init_params(4, &dst, &p); if (ret) { fprintf(stderr, "Destination ring create failed %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_msg_ring(sqe, dst.ring_fd, 0x10, 0x1234, 0); sqe->user_data = i + 1; } ret = io_uring_submit(ring); if (ret != 8) { /* * Likely an old kernel that doesn't support the opcode, * just skip the test. */ if (ret == 1) { err_ret = T_EXIT_SKIP; no_msg = 1; goto err; } fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 8; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1 ... 8: if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) { no_msg = 1; goto out; } if (cqe->res != 0) { fprintf(stderr, "cqe res %d\n", cqe->res); goto err; } break; case 0x1234: if (cqe->res != 0x10) { fprintf(stderr, "invalid len %x\n", cqe->res); goto err; } break; default: fprintf(stderr, "Invalid user_data\n"); goto err; } io_uring_cqe_seen(ring, cqe); } for (i = 0; i < 8; i++) { ret = io_uring_wait_cqe(&dst, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 0x1234: if (cqe->res != 0x10) { fprintf(stderr, "invalid len %x\n", cqe->res); goto err; } break; default: fprintf(stderr, "Invalid user_data\n"); goto err; } io_uring_cqe_seen(&dst, cqe); } out: io_uring_queue_exit(&dst); return no_msg ? T_EXIT_SKIP : T_EXIT_PASS; err: io_uring_queue_exit(&dst); return err_ret; } int main(int argc, char *argv[]) { struct io_uring src; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &src, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = test(&src, 0); if (ret && !no_msg) { fprintf(stderr, "test failed\n"); return ret; } if (no_msg) return T_EXIT_SKIP; ret = test(&src, IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "test IOPOLL failed\n"); return ret; } ret = test(&src, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER); if (ret) { fprintf(stderr, "test defer failed\n"); return ret; } ret = test(&src, IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "test defer IOPOLL failed\n"); return ret; } return T_EXIT_PASS; } liburing-2.6/test/msg-ring.c000066400000000000000000000217441461424365000160570ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test ring messaging command * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_msg; static int test_own(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_msg_ring(sqe, ring->ring_fd, 0x10, 0x1234, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) { no_msg = 1; return 0; } if (cqe->res != 0) { fprintf(stderr, "cqe res %d\n", cqe->res); return -1; } break; case 0x1234: if (cqe->res != 0x10) { fprintf(stderr, "invalid len %x\n", cqe->res); return -1; } break; default: fprintf(stderr, "Invalid user_data\n"); return -1; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } struct data { struct io_uring *ring; pthread_barrier_t barrier; }; static void *wait_cqe_fn(void *__data) { struct data *d = __data; struct io_uring *ring = d->ring; struct io_uring_cqe *cqe; int ret; pthread_barrier_wait(&d->barrier); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); goto err; } if (cqe->user_data != 0x5aa5) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); goto err; } if (cqe->res != 0x20) { fprintf(stderr, "len %x\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return NULL; err: io_uring_cqe_seen(ring, cqe); return (void *) (unsigned long) 1; } static int test_remote(struct io_uring *ring, struct io_uring *target) { pthread_t thread; void *tret; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct data d; int ret; d.ring = target; pthread_barrier_init(&d.barrier, NULL, 2); pthread_create(&thread, NULL, wait_cqe_fn, &d); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_msg_ring(sqe, target->ring_fd, 0x20, 0x5aa5, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } pthread_barrier_wait(&d.barrier); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != 0) { fprintf(stderr, "cqe res %d\n", cqe->res); return -1; } if (cqe->user_data != 1) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); return -1; } io_uring_cqe_seen(ring, cqe); pthread_join(thread, &tret); return 0; err: return 1; } static void *remote_submit_fn(void *data) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring *target = data; struct io_uring ring; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "thread ring setup failed: %d\n", ret); goto err; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_msg_ring(sqe, target->ring_fd, 0x20, 0x5aa5, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != 0 || cqe->user_data != 1) { fprintf(stderr, "invalid cqe\n"); goto err; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return NULL; err: return (void *) (unsigned long) 1; } static int test_remote_submit(struct io_uring *target) { struct io_uring_cqe *cqe; pthread_t thread; void *tret; int ret; pthread_create(&thread, NULL, remote_submit_fn, target); ret = io_uring_wait_cqe(target, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != 0x20) { fprintf(stderr, "cqe res %d\n", cqe->res); return -1; } if (cqe->user_data != 0x5aa5) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); return -1; } io_uring_cqe_seen(target, cqe); pthread_join(thread, &tret); return 0; err: return 1; } static int test_invalid(struct io_uring *ring, bool fixed) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, fd = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } if (fixed) { ret = io_uring_register_files(ring, &fd, 1); if (ret) { fprintf(stderr, "file register %d\n", ret); return 1; } io_uring_prep_msg_ring(sqe, 0, 0, 0x8989, 0); sqe->flags |= IOSQE_FIXED_FILE; } else { io_uring_prep_msg_ring(sqe, 1, 0, 0x8989, 0); } sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != -EBADFD) { fprintf(stderr, "cqe res %d\n", cqe->res); return -1; } io_uring_cqe_seen(ring, cqe); if (fixed) io_uring_unregister_files(ring); return 0; err: if (fixed) io_uring_unregister_files(ring); return 1; } static int test_disabled_ring(struct io_uring *ring, int flags) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring disabled_ring; int ret; flags |= IORING_SETUP_R_DISABLED; ret = io_uring_queue_init(8, &disabled_ring, flags); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } sqe = io_uring_get_sqe(ring); io_uring_prep_msg_ring(sqe, disabled_ring.ring_fd, 0x10, 0x1234, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return 1; } if (cqe->res != 0 && cqe->res != -EBADFD) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } if (cqe->user_data != 1) { fprintf(stderr, "user_data %llx\n", (long long) cqe->user_data); return 1; } io_uring_cqe_seen(ring, cqe); io_uring_queue_exit(&disabled_ring); return 0; } static int test(int ring_flags) { struct io_uring ring, ring2, pring; int ret, i; ret = io_uring_queue_init(8, &ring, ring_flags); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &ring2, ring_flags); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &pring, ring_flags | IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = test_own(&ring); if (ret) { fprintf(stderr, "test_own failed\n"); return T_EXIT_FAIL; } if (no_msg) return T_EXIT_SKIP; ret = test_own(&pring); if (ret) { fprintf(stderr, "test_own iopoll failed\n"); return T_EXIT_FAIL; } ret = test_invalid(&ring, 0); if (ret) { fprintf(stderr, "test_invalid failed\n"); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { ret = test_invalid(&ring, 1); if (ret) { fprintf(stderr, "test_invalid fixed failed\n"); return T_EXIT_FAIL; } } ret = test_remote(&ring, &ring2); if (ret) { fprintf(stderr, "test_remote failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); io_uring_queue_exit(&pring); if (t_probe_defer_taskrun()) { ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret) { fprintf(stderr, "deferred ring setup failed: %d\n", ret); return T_EXIT_FAIL; } ret = test_own(&ring); if (ret) { fprintf(stderr, "test_own deferred failed\n"); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { ret = test_invalid(&ring, i); if (ret) { fprintf(stderr, "test_invalid(0) deferred failed\n"); return T_EXIT_FAIL; } } ret = test_remote_submit(&ring); if (ret) { fprintf(stderr, "test_remote_submit failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); if (test_disabled_ring(&ring2, 0)) { fprintf(stderr, "test_disabled_ring failed\n"); return T_EXIT_FAIL; } if (test_disabled_ring(&ring2, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN)) { fprintf(stderr, "test_disabled_ring defer failed\n"); return T_EXIT_FAIL; } } io_uring_queue_exit(&ring2); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0); if (ret != T_EXIT_PASS) { fprintf(stderr, "ring flags 0 failed\n"); return ret; } ret = test(IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_DEFER_TASKRUN); if (ret != T_EXIT_PASS) { fprintf(stderr, "ring flags defer failed\n"); return ret; } return ret; } liburing-2.6/test/multicqes_drain.c000066400000000000000000000215441461424365000175150ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: generic tests for io_uring drain io * * The main idea is to randomly generate different type of sqe to * challenge the drain logic. There are some restrictions for the * generated sqes, details in io_uring maillist: * https://lore.kernel.org/io-uring/39a49b4c-27c2-1035-b250-51daeccaab9b@linux.alibaba.com/ * */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" enum { multi, single, nop, cancel, op_last, }; struct sqe_info { __u8 op; unsigned flags; }; #define max_entry 50 /* * sqe_flags: combination of sqe flags * multi_sqes: record the user_data/index of all the multishot sqes * cnt: how many entries there are in multi_sqes * we can leverage multi_sqes array for cancellation: we randomly pick * up an entry in multi_sqes when form a cancellation sqe. * multi_cap: limitation of number of multishot sqes */ static const unsigned sqe_flags[4] = { 0, IOSQE_IO_LINK, IOSQE_IO_DRAIN, IOSQE_IO_LINK | IOSQE_IO_DRAIN }; static int multi_sqes[max_entry], cnt = 0; static int multi_cap = max_entry / 5; static int write_pipe(int pipe, char *str) { int ret; do { errno = 0; ret = write(pipe, str, 3); } while (ret == -1 && errno == EINTR); return ret; } static void read_pipe(int pipe) { char str[4] = {0}; int ret; ret = read(pipe, &str, 3); if (ret < 0) perror("read"); } static int trigger_event(struct io_uring *ring, int p[]) { int ret; if ((ret = write_pipe(p[1], "foo")) != 3) { fprintf(stderr, "bad write return %d\n", ret); return 1; } usleep(1000); io_uring_get_events(ring); read_pipe(p[0]); return 0; } static void io_uring_sqe_prep(int op, struct io_uring_sqe *sqe, unsigned sqe_flags, int arg) { switch (op) { case multi: io_uring_prep_poll_add(sqe, arg, POLLIN); sqe->len |= IORING_POLL_ADD_MULTI; break; case single: io_uring_prep_poll_add(sqe, arg, POLLIN); break; case nop: io_uring_prep_nop(sqe); break; case cancel: io_uring_prep_poll_remove(sqe, arg); break; } sqe->flags = sqe_flags; } static __u8 generate_flags(int sqe_op) { __u8 flags = 0; /* * drain sqe must be put after multishot sqes cancelled */ do { flags = sqe_flags[rand() % 4]; } while ((flags & IOSQE_IO_DRAIN) && cnt); /* * cancel req cannot have drain or link flag */ if (sqe_op == cancel) { flags &= ~(IOSQE_IO_DRAIN | IOSQE_IO_LINK); } /* * avoid below case: * sqe0(multishot, link)->sqe1(nop, link)->sqe2(nop)->sqe3(cancel_sqe0) * sqe3 may execute before sqe0 so that sqe0 isn't cancelled */ if (sqe_op == multi) flags &= ~IOSQE_IO_LINK; return flags; } /* * function to generate opcode of a sqe * several restrictions here: * - cancel all the previous multishot sqes as soon as possible when * we reach high watermark. * - ensure there is some multishot sqe when generating a cancel sqe * - ensure a cancel/multshot sqe is not in a linkchain * - ensure number of multishot sqes doesn't exceed multi_cap * - don't generate multishot sqes after high watermark */ static int generate_opcode(int i, int pre_flags) { int sqe_op; int high_watermark = max_entry - max_entry / 5; bool retry0 = false, retry1 = false, retry2 = false; if ((i >= high_watermark) && cnt) { sqe_op = cancel; } else { do { sqe_op = rand() % op_last; retry0 = (sqe_op == cancel) && (!cnt || (pre_flags & IOSQE_IO_LINK)); retry1 = (sqe_op == multi) && ((multi_cap - 1 < 0) || i >= high_watermark); retry2 = (sqe_op == multi) && (pre_flags & IOSQE_IO_LINK); } while (retry0 || retry1 || retry2); } if (sqe_op == multi) multi_cap--; return sqe_op; } static inline void add_multishot_sqe(int index) { multi_sqes[cnt++] = index; } static int remove_multishot_sqe(void) { int ret; int rem_index = rand() % cnt; ret = multi_sqes[rem_index]; multi_sqes[rem_index] = multi_sqes[cnt - 1]; cnt--; return ret; } static int test_generic_drain(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe[max_entry]; struct sqe_info si[max_entry]; int cqe_data[max_entry << 1], cqe_res[max_entry << 1]; int i, j, ret, arg = 0; int pipes[max_entry][2]; int pre_flags = 0; for (i = 0; i < max_entry; i++) { if (pipe(pipes[i]) != 0) { perror("pipe"); return 1; } } srand((unsigned)time(NULL)); for (i = 0; i < max_entry; i++) { sqe[i] = io_uring_get_sqe(ring); if (!sqe[i]) { printf("get sqe failed\n"); goto err; } int sqe_op = generate_opcode(i, pre_flags); __u8 flags = generate_flags(sqe_op); if (sqe_op == cancel) arg = remove_multishot_sqe(); if (sqe_op == multi || sqe_op == single) arg = pipes[i][0]; io_uring_sqe_prep(sqe_op, sqe[i], flags, arg); sqe[i]->user_data = i; si[i].op = sqe_op; si[i].flags = flags; pre_flags = flags; if (sqe_op == multi) add_multishot_sqe(i); } ret = io_uring_submit(ring); if (ret < 0) { printf("sqe submit failed\n"); goto err; } else if (ret < max_entry) { printf("Submitted only %d\n", ret); goto err; } sleep(1); // TODO: randomize event triggering order for (i = 0; i < max_entry; i++) { if (si[i].op != multi && si[i].op != single) continue; if (trigger_event(ring, pipes[i])) goto err; } sleep(1); i = 0; while (!io_uring_peek_cqe(ring, &cqe)) { cqe_data[i] = cqe->user_data; cqe_res[i++] = cqe->res; io_uring_cqe_seen(ring, cqe); } /* * compl_bits is a bit map to record completions. * eg. sqe[0], sqe[1], sqe[2] fully completed * then compl_bits is 000...00111b * */ unsigned long long compl_bits = 0; for (j = 0; j < i; j++) { int index = cqe_data[j]; if ((si[index].flags & IOSQE_IO_DRAIN) && index) { if ((~compl_bits) & ((1ULL << index) - 1)) { printf("drain failed\n"); goto err; } } /* * for multishot sqes, record them only when it is cancelled */ if ((si[index].op != multi) || (cqe_res[j] == -ECANCELED)) compl_bits |= (1ULL << index); } return 0; err: return 1; } static int test_simple_drain(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe[2]; int i, ret; int pipe1[2], pipe2[2]; if (pipe(pipe1) != 0 || pipe(pipe2) != 0) { perror("pipe"); return 1; } for (i = 0; i < 2; i++) { sqe[i] = io_uring_get_sqe(ring); if (!sqe[i]) { printf("get sqe failed\n"); goto err; } } io_uring_prep_poll_multishot(sqe[0], pipe1[0], POLLIN); sqe[0]->user_data = 0; io_uring_prep_poll_add(sqe[1], pipe2[0], POLLIN); sqe[1]->user_data = 1; /* This test relies on multishot poll to trigger events continually. * however with IORING_SETUP_DEFER_TASKRUN this will only happen when * triggered with a get_events. Hence we sprinkle get_events whenever * there might be work to process in order to get the same result */ ret = io_uring_submit_and_get_events(ring); if (ret < 0) { printf("sqe submit failed\n"); goto err; } else if (ret < 2) { printf("Submitted only %d\n", ret); goto err; } for (i = 0; i < 2; i++) { if (trigger_event(ring, pipe1)) goto err; } if (trigger_event(ring, pipe2)) goto err; for (i = 0; i < 2; i++) { sqe[i] = io_uring_get_sqe(ring); if (!sqe[i]) { printf("get sqe failed\n"); goto err; } } io_uring_prep_poll_remove(sqe[0], 0); sqe[0]->user_data = 2; io_uring_prep_nop(sqe[1]); sqe[1]->flags |= IOSQE_IO_DRAIN; sqe[1]->user_data = 3; ret = io_uring_submit(ring); if (ret < 0) { printf("sqe submit failed\n"); goto err; } else if (ret < 2) { printf("Submitted only %d\n", ret); goto err; } for (i = 0; i < 6; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { printf("wait completion %d\n", ret); goto err; } if ((i == 5) && (cqe->user_data != 3)) goto err; io_uring_cqe_seen(ring, cqe); } close(pipe1[0]); close(pipe1[1]); close(pipe2[0]); close(pipe2[1]); return 0; err: return 1; } static int test(bool defer_taskrun) { struct io_uring ring; int i, ret; unsigned int flags = 0; if (defer_taskrun) flags = IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init(1024, &ring, flags); if (ret) { printf("ring setup failed\n"); return T_EXIT_FAIL; } for (i = 0; i < 5; i++) { ret = test_simple_drain(&ring); if (ret) { fprintf(stderr, "test_simple_drain failed\n"); return T_EXIT_FAIL; } } for (i = 0; i < 5; i++) { ret = test_generic_drain(&ring); if (ret) { fprintf(stderr, "test_generic_drain failed\n"); return T_EXIT_FAIL; } } io_uring_queue_exit(&ring); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(false); if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(false) failed\n", argv[0]); return ret; } if (t_probe_defer_taskrun()) { ret = test(true); if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(true) failed\n", argv[0]); return ret; } } return ret; } liburing-2.6/test/no-mmap-inval.c000066400000000000000000000015621461424365000170030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that using SETUP_NO_MMAP with an invalid SQ ring * address fails. * */ #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_params p = { .sq_entries = 2, .cq_entries = 4, .flags = IORING_SETUP_NO_MMAP, }; struct io_uring ring; void *addr; int ret; if (argc > 1) return T_EXIT_SKIP; t_posix_memalign(&addr, sysconf(_SC_PAGESIZE), 8192); p.cq_off.user_addr = (unsigned long long) (uintptr_t) addr; ret = io_uring_queue_init_params(2, &ring, &p); if (ret == -EINVAL) { /* kernel doesn't support SETUP_NO_MMAP */ return T_EXIT_SKIP; } else if (ret && ret != -EFAULT) { fprintf(stderr, "Got %d, wanted -EFAULT\n", ret); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/nolibc.c000066400000000000000000000022211461424365000155670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test liburing nolibc functionality. * * Currently, supported architectures are: * 1) x86 * 2) x86-64 * 3) aarch64 * 4) riscv64 * */ #include "helpers.h" #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) /* * This arch doesn't support nolibc. */ int main(void) { return T_EXIT_SKIP; } #else /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */ #ifndef CONFIG_NOLIBC #define CONFIG_NOLIBC #endif #include #include #include "../src/lib.h" static int test_get_page_size(void) { long a, b; a = sysconf(_SC_PAGESIZE); b = get_page_size(); if (a != b) { fprintf(stderr, "get_page_size() fails, %ld != %ld", a, b); return -1; } return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test_get_page_size(); if (ret) return T_EXIT_FAIL; return T_EXIT_PASS; } #endif /* #if !defined(__x86_64__) && !defined(__i386__) && !defined(__aarch64__) && (!defined(__riscv) && __riscv_xlen != 64) */ liburing-2.6/test/nop-all-sizes.c000066400000000000000000000031071461424365000170220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: exercise full filling of SQ and CQ ring * */ #include #include #include #include #include #include #include "liburing.h" #define MAX_ENTRIES 32768 static int fill_nops(struct io_uring *ring) { struct io_uring_sqe *sqe; int filled = 0; do { sqe = io_uring_get_sqe(ring); if (!sqe) break; io_uring_prep_nop(sqe); filled++; } while (1); return filled; } static int test_nops(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret, nr, total = 0, i; nr = fill_nops(ring); ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "submit %d, wanted %d\n", ret, nr); goto err; } total += ret; nr = fill_nops(ring); ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "submit %d, wanted %d\n", ret, nr); goto err; } total += ret; for (i = 0; i < total; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, depth; if (argc > 1) return 0; depth = 1; while (depth <= MAX_ENTRIES) { ret = io_uring_queue_init(depth, &ring, 0); if (ret) { if (ret == -ENOMEM) break; fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = test_nops(&ring); if (ret) { fprintf(stderr, "test_single_nop failed\n"); return ret; } depth <<= 1; io_uring_queue_exit(&ring); } return 0; } liburing-2.6/test/nop.c000066400000000000000000000063671461424365000151340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various nop tests * */ #include #include #include #include #include #include #include "liburing.h" #include "test.h" static int seq; static int test_single_nop(struct io_uring *ring, unsigned req_flags) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; bool cqe32 = (ring->flags & IORING_SETUP_CQE32); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->user_data = ++seq; sqe->flags |= req_flags; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (!cqe->user_data) { fprintf(stderr, "Unexpected 0 user_data\n"); goto err; } if (cqe32) { if (cqe->big_cqe[0] != 0) { fprintf(stderr, "Unexpected extra1\n"); goto err; } if (cqe->big_cqe[1] != 0) { fprintf(stderr, "Unexpected extra2\n"); goto err; } } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static int test_barrier_nop(struct io_uring *ring, unsigned req_flags) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; bool cqe32 = (ring->flags & IORING_SETUP_CQE32); for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); if (i == 4) sqe->flags = IOSQE_IO_DRAIN; sqe->user_data = ++seq; sqe->flags |= req_flags; } ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } else if (ret < 8) { fprintf(stderr, "Submitted only %d\n", ret); goto err; } for (i = 0; i < 8; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (!cqe->user_data) { fprintf(stderr, "Unexpected 0 user_data\n"); goto err; } if (cqe32) { if (cqe->big_cqe[0] != 0) { fprintf(stderr, "Unexpected extra1\n"); goto err; } if (cqe->big_cqe[1] != 0) { fprintf(stderr, "Unexpected extra2\n"); goto err; } } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int test_ring(unsigned flags) { struct io_uring ring; struct io_uring_params p = { }; int ret, i; p.flags = flags; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { if (ret == -EINVAL) return 0; fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } for (i = 0; i < 1000; i++) { unsigned req_flags = (i & 1) ? IOSQE_ASYNC : 0; ret = test_single_nop(&ring, req_flags); if (ret) { fprintf(stderr, "test_single_nop failed\n"); goto err; } ret = test_barrier_nop(&ring, req_flags); if (ret) { fprintf(stderr, "test_barrier_nop failed\n"); goto err; } } err: io_uring_queue_exit(&ring); return ret; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; FOR_ALL_TEST_CONFIGS { ret = test_ring(IORING_GET_TEST_CONFIG_FLAGS()); if (ret) { fprintf(stderr, "Normal ring test failed: %s\n", IORING_GET_TEST_CONFIG_DESCRIPTION()); return ret; } } return 0; } liburing-2.6/test/nvme.h000066400000000000000000000056331461424365000153050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Helpers for NVMe uring passthrough commands */ #ifndef LIBURING_NVME_H #define LIBURING_NVME_H #ifdef __cplusplus extern "C" { #endif #include #include /* * If the uapi headers installed on the system lacks nvme uring command * support, use the local version to prevent compilation issues. */ #ifndef CONFIG_HAVE_NVME_URING struct nvme_uring_cmd { __u8 opcode; __u8 flags; __u16 rsvd1; __u32 nsid; __u32 cdw2; __u32 cdw3; __u64 metadata; __u64 addr; __u32 metadata_len; __u32 data_len; __u32 cdw10; __u32 cdw11; __u32 cdw12; __u32 cdw13; __u32 cdw14; __u32 cdw15; __u32 timeout_ms; __u32 rsvd2; }; #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) #define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) #endif /* CONFIG_HAVE_NVME_URING */ #define NVME_DEFAULT_IOCTL_TIMEOUT 0 #define NVME_IDENTIFY_DATA_SIZE 4096 #define NVME_IDENTIFY_CSI_SHIFT 24 #define NVME_IDENTIFY_CNS_NS 0 #define NVME_CSI_NVM 0 enum nvme_admin_opcode { nvme_admin_identify = 0x06, }; enum nvme_io_opcode { nvme_cmd_write = 0x01, nvme_cmd_read = 0x02, }; static int nsid; static __u32 lba_shift; struct nvme_lbaf { __le16 ms; __u8 ds; __u8 rp; }; struct nvme_id_ns { __le64 nsze; __le64 ncap; __le64 nuse; __u8 nsfeat; __u8 nlbaf; __u8 flbas; __u8 mc; __u8 dpc; __u8 dps; __u8 nmic; __u8 rescap; __u8 fpi; __u8 dlfeat; __le16 nawun; __le16 nawupf; __le16 nacwu; __le16 nabsn; __le16 nabo; __le16 nabspf; __le16 noiob; __u8 nvmcap[16]; __le16 npwg; __le16 npwa; __le16 npdg; __le16 npda; __le16 nows; __le16 mssrl; __le32 mcl; __u8 msrc; __u8 rsvd81[11]; __le32 anagrpid; __u8 rsvd96[3]; __u8 nsattr; __le16 nvmsetid; __le16 endgid; __u8 nguid[16]; __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; __u8 vs[3712]; }; static inline int ilog2(uint32_t i) { int log = -1; while (i) { i >>= 1; log++; } return log; } __attribute__((__unused__)) static int nvme_get_info(const char *file) { struct nvme_id_ns ns; int fd, err; __u32 lba_size; fd = open(file, O_RDONLY); if (fd < 0) { perror("file open"); return -errno; } nsid = ioctl(fd, NVME_IOCTL_ID); if (nsid < 0) { close(fd); return -errno; } struct nvme_passthru_cmd cmd = { .opcode = nvme_admin_identify, .nsid = nsid, .addr = (__u64)(uintptr_t)&ns, .data_len = NVME_IDENTIFY_DATA_SIZE, .cdw10 = NVME_IDENTIFY_CNS_NS, .cdw11 = NVME_CSI_NVM << NVME_IDENTIFY_CSI_SHIFT, .timeout_ms = NVME_DEFAULT_IOCTL_TIMEOUT, }; err = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd); if (err) { close(fd); return err; } lba_size = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds; lba_shift = ilog2(lba_size); close(fd); return 0; } #ifdef __cplusplus } #endif #endif liburing-2.6/test/open-close.c000066400000000000000000000115431461424365000163740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various openat(2) tests * */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int submit_wait(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return 1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; } static inline int try_close(struct io_uring *ring, int fd, int slot) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_close(sqe, fd); __io_uring_set_target_fixed_file(sqe, slot); return submit_wait(ring); } static int test_close_fixed(void) { struct io_uring ring; struct io_uring_sqe *sqe; int ret, fds[2]; char buf[1]; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return -1; } if (pipe(fds)) { perror("pipe"); return -1; } ret = try_close(&ring, 0, 0); if (ret == -EINVAL) { fprintf(stderr, "close for fixed files is not supported\n"); return 0; } else if (ret != -ENXIO) { fprintf(stderr, "no table failed %i\n", ret); return -1; } ret = try_close(&ring, 1, 0); if (ret != -EINVAL) { fprintf(stderr, "set fd failed %i\n", ret); return -1; } ret = io_uring_register_files(&ring, fds, 2); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } ret = try_close(&ring, 0, 2); if (ret != -EINVAL) { fprintf(stderr, "out of table failed %i\n", ret); return -1; } ret = try_close(&ring, 0, 0); if (ret != 0) { fprintf(stderr, "close failed %i\n", ret); return -1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); sqe->flags |= IOSQE_FIXED_FILE; ret = submit_wait(&ring); if (ret != -EBADF) { fprintf(stderr, "read failed %i\n", ret); return -1; } ret = try_close(&ring, 0, 1); if (ret != 0) { fprintf(stderr, "close 2 failed %i\n", ret); return -1; } ret = try_close(&ring, 0, 0); if (ret != -EBADF) { fprintf(stderr, "empty slot failed %i\n", ret); return -1; } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } static int test_close(struct io_uring *ring, int fd, int is_ring_fd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_close(sqe, fd); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { if (!(is_ring_fd && ret == -EBADF)) { fprintf(stderr, "wait completion %d\n", ret); goto err; } return ret; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return -1; } static int test_openat(struct io_uring *ring, const char *path, int dfd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_openat(sqe, dfd, path, O_RDONLY, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return -1; } int main(int argc, char *argv[]) { struct io_uring ring; const char *path, *path_rel; int ret, do_unlink; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } if (argc > 1) { path = "/tmp/.open.close"; path_rel = argv[1]; do_unlink = 0; } else { path = "/tmp/.open.close"; path_rel = ".open.close"; do_unlink = 1; } t_create_file(path, 4096); if (do_unlink) t_create_file(path_rel, 4096); ret = test_openat(&ring, path, -1); if (ret < 0) { if (ret == -EINVAL) { fprintf(stdout, "Open not supported, skipping\n"); goto done; } fprintf(stderr, "test_openat absolute failed: %d\n", ret); goto err; } ret = test_openat(&ring, path_rel, AT_FDCWD); if (ret < 0) { fprintf(stderr, "test_openat relative failed: %d\n", ret); goto err; } ret = test_close(&ring, ret, 0); if (ret) { fprintf(stderr, "test_close normal failed\n"); goto err; } ret = test_close(&ring, ring.ring_fd, 1); if (ret != -EBADF) { fprintf(stderr, "test_close ring_fd failed\n"); goto err; } ret = test_close_fixed(); if (ret) { fprintf(stderr, "test_close_fixed failed\n"); goto err; } done: unlink(path); if (do_unlink) unlink(path_rel); return 0; err: unlink(path); if (do_unlink) unlink(path_rel); return 1; } liburing-2.6/test/open-direct-link.c000066400000000000000000000071061461424365000174740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: link * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define MAX_FILES 8 #define FNAME ".link.direct" static int test(struct io_uring *ring, int skip_success, int drain, int async) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buf[4096]; int ret, i; /* drain and cqe skip are mutually exclusive */ if (skip_success && drain) return 1; sqe = io_uring_get_sqe(ring); io_uring_prep_openat_direct(sqe, AT_FDCWD, FNAME, O_RDONLY, 0, 0); if (!drain) sqe->flags |= IOSQE_IO_LINK; if (skip_success) sqe->flags |= IOSQE_CQE_SKIP_SUCCESS; if (async) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, 0, buf, sizeof(buf), 0); sqe->flags |= IOSQE_FIXED_FILE; if (drain) sqe->flags |= IOSQE_IO_DRAIN; else sqe->flags |= IOSQE_IO_LINK; if (async) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 2; sqe = io_uring_get_sqe(ring); io_uring_prep_close_direct(sqe, 0); sqe->user_data = 3; if (skip_success) sqe->flags |= IOSQE_CQE_SKIP_SUCCESS; if (drain) sqe->flags |= IOSQE_IO_DRAIN; if (async) sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(ring); if (ret != 3) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } if (skip_success) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->user_data != 2) { fprintf(stderr, "Unexpected cqe %lu/%d\n", (unsigned long) cqe->user_data, cqe->res); goto err; } if (cqe->res != sizeof(buf)) { fprintf(stderr, "bad read %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return 0; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res) { fprintf(stderr, "bad open %d\n", cqe->res); goto err; } break; case 2: if (cqe->res != sizeof(buf)) { fprintf(stderr, "bad read %d\n", cqe->res); goto err; } break; case 3: if (cqe->res) { fprintf(stderr, "bad close %d\n", cqe->res); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_params p = { }; int ret, files[MAX_FILES]; if (argc > 1) return 0; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (!(p.features & IORING_FEAT_CQE_SKIP)) return 0; memset(files, -1, sizeof(files)); ret = io_uring_register_files(&ring, files, ARRAY_SIZE(files)); if (ret) { fprintf(stderr, "Failed registering files\n"); return 1; } t_create_file(FNAME, 4096); ret = test(&ring, 0, 0, 0); if (ret) { fprintf(stderr, "test 0 0 0 failed\n"); goto err; } ret = test(&ring, 0, 1, 0); if (ret) { fprintf(stderr, "test 0 1 0 failed\n"); goto err; } ret = test(&ring, 0, 0, 1); if (ret) { fprintf(stderr, "test 0 0 1 failed\n"); goto err; } ret = test(&ring, 0, 1, 1); if (ret) { fprintf(stderr, "test 0 1 1 failed\n"); goto err; } ret = test(&ring, 1, 0, 0); if (ret) { fprintf(stderr, "test 1 0 0 failed\n"); goto err; } ret = test(&ring, 1, 0, 1); if (ret) { fprintf(stderr, "test 1 0 1 failed\n"); goto err; } unlink(FNAME); return 0; err: unlink(FNAME); return 1; } liburing-2.6/test/open-direct-pick.c000066400000000000000000000063631461424365000174710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various openat(2) tests * */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FDS 800 static int no_direct_pick; static int submit_wait(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return 1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; } static inline int try_close(struct io_uring *ring, int slot) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); io_uring_prep_close_direct(sqe, slot); return submit_wait(ring); } static int do_opens(struct io_uring *ring, const char *path, int nr, int expect_enfile) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int i, ret; for (i = 0; i < nr; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_openat_direct(sqe, -1, path, O_RDONLY, 0, 0); sqe->file_index = UINT_MAX; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } } for (i = 0; i < nr; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; if (ret < 0) { if (!expect_enfile || ret != -ENFILE) { printf("open=%d, %d\n", cqe->res, i); goto err; } if (!i && ret == -EINVAL) { no_direct_pick = 1; return 0; } } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int test_openat(struct io_uring *ring, const char *path) { int ret, i; /* open all */ ret = do_opens(ring, path, FDS, 0); if (ret) goto err; if (no_direct_pick) return 0; /* now close 100 randomly */ for (i = 0; i < 100; i++) { do { int slot = rand() % FDS; ret = try_close(ring, slot); if (ret == -EBADF) continue; break; } while (1); } /* opening 100 should work, we closed 100 */ ret = do_opens(ring, path, 100, 0); if (ret) goto err; /* we should be full now, expect -ENFILE */ ret = do_opens(ring, path, 1, 1); if (ret) goto err; return ret; err: fprintf(stderr,"%s: err=%d\n", __FUNCTION__, ret); return -1; } int main(int argc, char *argv[]) { struct io_uring ring; const char *path; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = io_uring_register_files_sparse(&ring, FDS); if (ret ) { if (ret != -EINVAL) { fprintf(stderr, "Sparse file registration failed\n"); return 1; } /* skip, kernel doesn't support sparse file array */ return 0; } path = "/tmp/.open.direct.pick"; t_create_file(path, 4096); ret = test_openat(&ring, path); if (ret < 0) { if (ret == -EINVAL) { fprintf(stdout, "Open not supported, skipping\n"); goto done; } fprintf(stderr, "test_openat absolute failed: %d\n", ret); goto err; } done: unlink(path); return 0; err: unlink(path); return 1; } liburing-2.6/test/openat2.c000066400000000000000000000150671461424365000157050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various openat2(2) tests * */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int test_openat2(struct io_uring *ring, const char *path, int dfd, bool direct, int fixed_index) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct open_how how; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return -1; } memset(&how, 0, sizeof(how)); how.flags = O_RDWR; if (!direct) io_uring_prep_openat2(sqe, dfd, path, &how); else io_uring_prep_openat2_direct(sqe, dfd, path, &how, fixed_index); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return -1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (direct && ret > 0) { close(ret); return -EINVAL; } return ret; } static int test_open_fixed(const char *path, int dfd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; const char pattern = 0xac; char buffer[] = { 0, 0 }; int i, ret, fd = -1; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return -1; } ret = io_uring_register_files(&ring, &fd, 1); if (ret) { if (ret == -EINVAL || ret == -EBADF) return 0; fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); return -1; } ret = test_openat2(&ring, path, dfd, true, 0); if (ret == -EINVAL) { printf("fixed open isn't supported\n"); return 1; } else if (ret) { fprintf(stderr, "direct open failed %d\n", ret); return -1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_write(sqe, 0, &pattern, 1, 0); sqe->user_data = 1; sqe->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, 0, buffer, 1, 0); sqe->user_data = 2; sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: got %d, wanted 2\n", __FUNCTION__, ret); return -1; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return -1; } if (cqe->res != 1) { fprintf(stderr, "unexpectetd ret %d\n", cqe->res); return -1; } io_uring_cqe_seen(&ring, cqe); } if (memcmp(&pattern, buffer, 1) != 0) { fprintf(stderr, "buf validation failed\n"); return -1; } io_uring_queue_exit(&ring); return 0; } static int test_open_fixed_fail(const char *path, int dfd) { struct io_uring ring; int ret, fd = -1; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return -1; } ret = test_openat2(&ring, path, dfd, true, 0); if (ret != -ENXIO) { fprintf(stderr, "install into not existing table, %i\n", ret); return 1; } ret = io_uring_register_files(&ring, &fd, 1); if (ret) { if (ret == -EINVAL || ret == -EBADF) return 0; fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); return -1; } ret = test_openat2(&ring, path, dfd, true, 1); if (ret != -EINVAL) { fprintf(stderr, "install out of bounds, %i\n", ret); return -1; } ret = test_openat2(&ring, path, dfd, true, (1u << 16)); if (ret != -EINVAL) { fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret); return -1; } ret = test_openat2(&ring, path, dfd, true, (1u << 16) + 1); if (ret != -EINVAL) { fprintf(stderr, "install out of bounds or u16 overflow, %i\n", ret); return -1; } io_uring_queue_exit(&ring); return 0; } static int test_direct_reinstall(const char *path, int dfd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; char buf[1] = { 0xfa }; struct io_uring ring; int ret, pipe_fds[2]; ssize_t ret2; if (pipe2(pipe_fds, O_NONBLOCK)) { fprintf(stderr, "pipe() failed\n"); return -1; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return -1; } ret = io_uring_register_files(&ring, pipe_fds, 2); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); return -1; } /* reinstall into the second slot */ ret = test_openat2(&ring, path, dfd, true, 1); if (ret != 0) { fprintf(stderr, "reinstall failed, %i\n", ret); return -1; } /* verify it's reinstalled, first write into the slot... */ sqe = io_uring_get_sqe(&ring); io_uring_prep_write(sqe, 1, buf, sizeof(buf), 0); sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); return -1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return ret; } ret = cqe->res; io_uring_cqe_seen(&ring, cqe); if (ret != 1) { fprintf(stderr, "invalid write %i\n", ret); return -1; } /* ... and make sure nothing has been written to the pipe */ ret2 = read(pipe_fds[0], buf, 1); if (ret2 != 0 && !(ret2 < 0 && errno == EAGAIN)) { fprintf(stderr, "invalid pipe read, %d %d\n", errno, (int)ret2); return -1; } close(pipe_fds[0]); close(pipe_fds[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; const char *path, *path_rel; int ret, do_unlink; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } if (argc > 1) { path = "/tmp/.open.at2"; path_rel = argv[1]; do_unlink = 0; } else { path = "/tmp/.open.at2"; path_rel = ".open.at2"; do_unlink = 1; } t_create_file(path, 4096); if (do_unlink) t_create_file(path_rel, 4096); ret = test_openat2(&ring, path, -1, false, 0); if (ret < 0) { if (ret == -EINVAL) { fprintf(stdout, "openat2 not supported, skipping\n"); goto done; } fprintf(stderr, "test_openat2 absolute failed: %d\n", ret); goto err; } ret = test_openat2(&ring, path_rel, AT_FDCWD, false, 0); if (ret < 0) { fprintf(stderr, "test_openat2 relative failed: %d\n", ret); goto err; } ret = test_open_fixed(path, -1); if (ret > 0) goto done; if (ret) { fprintf(stderr, "test_open_fixed failed\n"); goto err; } ret = test_open_fixed_fail(path, -1); if (ret) { fprintf(stderr, "test_open_fixed_fail failed\n"); goto err; } ret = test_direct_reinstall(path, -1); if (ret) { fprintf(stderr, "test_direct_reinstall failed\n"); goto err; } done: unlink(path); if (do_unlink) unlink(path_rel); return 0; err: unlink(path); if (do_unlink) unlink(path_rel); return 1; } liburing-2.6/test/personality.c000066400000000000000000000073661461424365000167110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test if personalities work * */ #include #include #include #include #include #include #include "liburing.h" #define FNAME "/tmp/.tmp.access" #define USE_UID 1000 static int no_personality; static int open_file(struct io_uring *ring, int cred_id, int with_link) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i, to_submit = 1; if (with_link) { sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; to_submit++; } sqe = io_uring_get_sqe(ring); io_uring_prep_openat(sqe, -1, FNAME, O_RDONLY, 0); sqe->user_data = 2; if (cred_id != -1) sqe->personality = cred_id; ret = io_uring_submit(ring); if (ret != to_submit) { fprintf(stderr, "submit got: %d\n", ret); goto err; } for (i = 0; i < to_submit; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); } err: return ret; } static int test_personality(struct io_uring *ring) { int ret, cred_id; ret = io_uring_register_personality(ring); if (ret < 0) { if (ret == -EINVAL) { fprintf(stdout, "Personalities not supported, skipping\n"); no_personality = 1; goto out; } fprintf(stderr, "register_personality: %d\n", ret); goto err; } cred_id = ret; /* create file only owner can open */ ret = open(FNAME, O_RDONLY | O_CREAT, 0600); if (ret < 0) { perror("open"); goto err; } close(ret); /* verify we can open it */ ret = open_file(ring, -1, 0); if (ret < 0) { fprintf(stderr, "current open got: %d\n", ret); goto err; } if (seteuid(USE_UID) < 0) { fprintf(stdout, "Can't switch to UID %u, skipping\n", USE_UID); goto out; } /* verify we can't open it with current credentials */ ret = open_file(ring, -1, 0); if (ret != -EACCES) { fprintf(stderr, "open got: %d\n", ret); goto err; } /* verify we can open with registered credentials */ ret = open_file(ring, cred_id, 0); if (ret < 0) { fprintf(stderr, "credential open: %d\n", ret); goto err; } close(ret); /* verify we can open with registered credentials and as a link */ ret = open_file(ring, cred_id, 1); if (ret < 0) { fprintf(stderr, "credential open: %d\n", ret); goto err; } if (seteuid(0)) perror("seteuid"); ret = io_uring_unregister_personality(ring, cred_id); if (ret) { fprintf(stderr, "register_personality: %d\n", ret); goto err; } out: unlink(FNAME); return 0; err: unlink(FNAME); return 1; } static int test_invalid_personality(struct io_uring *ring) { int ret; ret = open_file(ring, 2, 0); if (ret != -EINVAL) { fprintf(stderr, "invalid personality got: %d\n", ret); goto err; } return 0; err: return 1; } static int test_invalid_unregister(struct io_uring *ring) { int ret; ret = io_uring_unregister_personality(ring, 2); if (ret != -EINVAL) { fprintf(stderr, "invalid personality unregister got: %d\n", ret); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return 0; if (geteuid()) { fprintf(stderr, "Not root, skipping\n"); return 0; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = test_personality(&ring); if (ret) { fprintf(stderr, "test_personality failed\n"); return ret; } if (no_personality) return 0; ret = test_invalid_personality(&ring); if (ret) { fprintf(stderr, "test_invalid_personality failed\n"); return ret; } ret = test_invalid_unregister(&ring); if (ret) { fprintf(stderr, "test_invalid_unregister failed\n"); return ret; } return 0; } liburing-2.6/test/pipe-bug.c000066400000000000000000000037661461424365000160500ustar00rootroot00000000000000// SPDX-License-Identifier: MIT /* * Description: tests bug fixed in * "io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL" * * See: https://github.com/axboe/liburing/issues/665 */ #include #include #include #include "helpers.h" #include "liburing.h" #define CHECK(x) \ do { \ if (!(x)) { \ fprintf(stderr, "%s:%d %s failed\n", __FILE__, __LINE__, #x); \ return -1; \ } \ } while (0) static int pipe_bug(void) { struct io_uring_params p; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; char buf[1024]; int fds[2]; struct __kernel_timespec to = { .tv_sec = 1 }; CHECK(pipe(fds) == 0); memset(&p, 0, sizeof(p)); CHECK(t_create_ring_params(8, &ring, &p) == 0); /* WRITE */ sqe = io_uring_get_sqe(&ring); CHECK(sqe); io_uring_prep_write(sqe, fds[1], "foobar", strlen("foobar"), 0); /* or -1 */ CHECK(io_uring_submit(&ring) == 1); CHECK(io_uring_wait_cqe(&ring, &cqe) == 0); io_uring_cqe_seen(&ring, cqe); /* CLOSE */ sqe = io_uring_get_sqe(&ring); CHECK(sqe); io_uring_prep_close(sqe, fds[1]); CHECK(io_uring_submit(&ring) == 1); CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0); io_uring_cqe_seen(&ring, cqe); /* READ */ sqe = io_uring_get_sqe(&ring); CHECK(sqe); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); /* or -1 */ CHECK(io_uring_submit(&ring) == 1); CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0); io_uring_cqe_seen(&ring, cqe); memset(buf, 0, sizeof(buf)); /* READ */ sqe = io_uring_get_sqe(&ring); CHECK(sqe); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); /* or -1 */ CHECK(io_uring_submit(&ring) == 1); CHECK(io_uring_wait_cqe_timeout(&ring, &cqe, &to) == 0); io_uring_cqe_seen(&ring, cqe); close(fds[0]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int i; if (argc > 1) return T_EXIT_SKIP; for (i = 0; i < 10000; i++) { if (pipe_bug()) return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/pipe-eof.c000066400000000000000000000026461461424365000160400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test that closed pipe reads returns 0, instead of waiting for more * data. */ #include #include #include #include #include #include #include "liburing.h" #define BUFSIZE 512 struct data { char *str; int fds[2]; }; static void *t(void *data) { struct data *d = data; int ret; strcpy(d->str, "This is a test string"); ret = write(d->fds[1], d->str, strlen(d->str)); close(d->fds[1]); if (ret < 0) perror("write"); return NULL; } int main(int argc, char *argv[]) { static char buf[BUFSIZE]; struct io_uring ring; pthread_t thread; struct data d; int ret; if (pipe(d.fds) < 0) { perror("pipe"); return 1; } d.str = buf; io_uring_queue_init(8, &ring, 0); pthread_create(&thread, NULL, t, &d); while (1) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, d.fds[0], buf, BUFSIZE, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "Read error: %s\n", strerror(-cqe->res)); return 1; } if (cqe->res == 0) break; io_uring_cqe_seen(&ring, cqe); } pthread_join(thread, NULL); io_uring_queue_exit(&ring); return 0; } liburing-2.6/test/pipe-reuse.c000066400000000000000000000041021461424365000163770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check split up read is handled correctly */ #include #include #include #include #include #include #include "liburing.h" #define BUFSIZE 16384 #define BUFFERS 16 int main(int argc, char *argv[]) { char buf[BUFSIZE], wbuf[BUFSIZE]; struct iovec iov[BUFFERS]; struct io_uring_params p = { }; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i, fds[2]; void *ptr; if (pipe(fds) < 0) { perror("pipe"); return 1; } ptr = buf; for (i = 0; i < BUFFERS; i++) { unsigned bsize = BUFSIZE / BUFFERS; iov[i].iov_base = ptr; iov[i].iov_len = bsize; ptr += bsize; } ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return 1; } if (!(p.features & IORING_FEAT_SUBMIT_STABLE)) { fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n"); return 0; } ptr = wbuf; memset(ptr, 0x11, sizeof(wbuf) / 2); ptr += sizeof(wbuf) / 2; memset(ptr, 0x22, sizeof(wbuf) / 2); ret = write(fds[1], wbuf, sizeof(wbuf) / 2); if (ret != sizeof(wbuf) / 2) { fprintf(stderr, "Bad write\n"); ret = 1; goto err; } sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fds[0], iov, BUFFERS, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } for (i = 0; i < BUFFERS; i++) { iov[i].iov_base = NULL; iov[i].iov_len = 1000000; } ret = write(fds[1], ptr, sizeof(wbuf) / 2); if (ret != sizeof(wbuf) / 2) { fprintf(stderr, "Bad write\n"); ret = 1; goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "Read error: %s\n", strerror(-cqe->res)); return 1; } else if (cqe->res != sizeof(wbuf)) { /* ignore short read, not a failure */ goto err; } io_uring_cqe_seen(&ring, cqe); ret = memcmp(wbuf, buf, sizeof(wbuf)); if (ret) fprintf(stderr, "Read data mismatch\n"); err: io_uring_queue_exit(&ring); return ret; } liburing-2.6/test/poll-cancel-all.c000066400000000000000000000227071461424365000172730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Test IORING_ASYNC_CANCEL_{ALL,FD} * */ #include #include #include #include #include #include #include "liburing.h" static int no_cancel_flags; static int test1(struct io_uring *ring, int *fd, int fixed) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i, __fd = fd[0]; if (fixed) __fd = 0; if (fixed) { ret = io_uring_register_files(ring, fd, 1); if (ret) { fprintf(stderr, "failed file register %d\n", ret); return 1; } } for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } io_uring_prep_poll_add(sqe, __fd, POLLIN); sqe->user_data = i + 1; if (fixed) sqe->flags |= IOSQE_FIXED_FILE; } ret = io_uring_submit(ring); if (ret < 8) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } /* * Mark CANCEL_ALL to cancel all matching the key, and use * CANCEL_FD to cancel requests matching the specified fd. * This should cancel all the pending poll requests on the pipe * input. */ io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD; if (fixed) sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD_FIXED; sqe->fd = __fd; sqe->user_data = 100; ret = io_uring_submit(ring); if (ret < 1) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); return 1; } for (i = 0; i < 9; i++) { if (no_cancel_flags) break; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); return 1; } switch (cqe->user_data) { case 100: if (cqe->res == -EINVAL) { no_cancel_flags = 1; break; } if (cqe->res != 8) { fprintf(stderr, "canceled %d\n", cqe->res); return 1; } break; case 1 ... 8: if (cqe->res != -ECANCELED) { fprintf(stderr, "poll res %d\n", cqe->res); return 1; } break; default: fprintf(stderr, "invalid user_data %lu\n", (unsigned long) cqe->user_data); return 1; } io_uring_cqe_seen(ring, cqe); } if (fixed) io_uring_unregister_files(ring); return 0; } static int test2(struct io_uring *ring, int *fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i, fd2[2]; if (pipe(fd2) < 0) { perror("pipe"); return 1; } for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } if (!(i & 1)) io_uring_prep_poll_add(sqe, fd[0], POLLIN); else io_uring_prep_poll_add(sqe, fd2[0], POLLIN); sqe->user_data = i & 1; } ret = io_uring_submit(ring); if (ret < 8) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } /* * Mark CANCEL_ALL to cancel all matching the key, and use * CANCEL_FD to cancel requests matching the specified fd. * This should cancel all the pending poll requests on the pipe * input. */ io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD; sqe->fd = fd[0]; sqe->user_data = 100; ret = io_uring_submit(ring); if (ret < 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 5; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); goto err; } switch (cqe->user_data) { case 100: if (cqe->res != 4) { fprintf(stderr, "canceled %d\n", cqe->res); goto err; } break; case 0: if (cqe->res != -ECANCELED) { fprintf(stderr, "poll res %d\n", cqe->res); goto err; } break; default: fprintf(stderr, "invalid user_data %lu\n", (unsigned long) cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } usleep(1000); /* * Should not have any pending CQEs now */ ret = io_uring_peek_cqe(ring, &cqe); if (!ret) { fprintf(stderr, "Unexpected extra cancel cqe\n"); goto err; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } /* * Mark CANCEL_ALL to cancel all matching the key, and use * CANCEL_FD to cancel requests matching the specified fd. * This should cancel all the pending poll requests on the pipe * input. */ io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); sqe->cancel_flags |= IORING_ASYNC_CANCEL_FD; sqe->fd = fd2[0]; sqe->user_data = 100; ret = io_uring_submit(ring); if (ret < 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 5; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); goto err; } switch (cqe->user_data) { case 100: if (cqe->res != 4) { fprintf(stderr, "canceled %d\n", cqe->res); goto err; } break; case 1: if (cqe->res != -ECANCELED) { fprintf(stderr, "poll res %d\n", cqe->res); goto err; } break; default: fprintf(stderr, "invalid user_data %lu\n", (unsigned long) cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } close(fd2[0]); close(fd2[1]); return 0; err: close(fd2[0]); close(fd2[1]); return 1; } static int test3(struct io_uring *ring, int *fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i, fd2[2]; if (pipe(fd2) < 0) { perror("pipe"); return 1; } for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } if (!(i & 1)) { io_uring_prep_poll_add(sqe, fd[0], POLLIN); sqe->flags |= IOSQE_ASYNC; } else io_uring_prep_poll_add(sqe, fd2[0], POLLIN); sqe->user_data = i & 1; } ret = io_uring_submit(ring); if (ret < 8) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); goto err; } usleep(10000); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } /* * Mark CANCEL_ALL to cancel all matching the key, and use * CANCEL_FD to cancel requests matching the specified fd. * This should cancel all the pending poll requests on the pipe * input. */ io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); sqe->cancel_flags |= IORING_ASYNC_CANCEL_ANY; sqe->fd = 0; sqe->user_data = 100; ret = io_uring_submit(ring); if (ret < 1) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 9; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); goto err; } switch (cqe->user_data) { case 100: if (cqe->res != 8) { fprintf(stderr, "canceled %d\n", cqe->res); goto err; } break; case 0: case 1: if (cqe->res != -ECANCELED) { fprintf(stderr, "poll res %d\n", cqe->res); goto err; } break; default: fprintf(stderr, "invalid user_data %lu\n", (unsigned long) cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } close(fd2[0]); close(fd2[1]); return 0; err: close(fd2[0]); close(fd2[1]); return 1; } static int test4(struct io_uring *ring, int *fd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; char buffer[32]; int ret, i; for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_read(sqe, fd[0], &buffer, sizeof(buffer), 0); sqe->flags |= IOSQE_ASYNC; sqe->user_data = i + 1; } ret = io_uring_submit(ring); if (ret < 8) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); goto err; } usleep(10000); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } /* * Mark CANCEL_ALL to cancel all matching the key, and use * CANCEL_FD to cancel requests matching the specified fd. * This should cancel all the pending poll requests on the pipe * input. */ io_uring_prep_cancel(sqe, 0, IORING_ASYNC_CANCEL_ALL); sqe->cancel_flags |= IORING_ASYNC_CANCEL_ANY; sqe->fd = 0; sqe->user_data = 100; ret = io_uring_submit(ring); if (ret < 1) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < 9; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait=%d\n", ret); goto err; } switch (cqe->user_data) { case 100: if (cqe->res != 8) { fprintf(stderr, "canceled %d\n", cqe->res); goto err; } break; case 1 ... 8: if (cqe->res != -ECANCELED) { fprintf(stderr, "poll res %d\n", cqe->res); goto err; } break; default: fprintf(stderr, "invalid user_data %lu\n", (unsigned long) cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, fd[2]; if (argc > 1) return 0; if (pipe(fd) < 0) { perror("pipe"); return 1; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = test1(&ring, fd, 0); if (ret) { fprintf(stderr, "test1 failed\n"); return ret; } if (no_cancel_flags) return 0; ret = test1(&ring, fd, 1); if (ret) { fprintf(stderr, "test1 fixed failed\n"); return ret; } ret = test2(&ring, fd); if (ret) { fprintf(stderr, "test2 failed\n"); return ret; } ret = test3(&ring, fd); if (ret) { fprintf(stderr, "test3 failed\n"); return ret; } ret = test4(&ring, fd); if (ret) { fprintf(stderr, "test4 failed\n"); return ret; } return 0; } liburing-2.6/test/poll-cancel-ton.c000066400000000000000000000050401461424365000173120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test massive amounts of poll with cancel * */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #define POLL_COUNT 30000 static void *sqe_index[POLL_COUNT]; static int reap_events(struct io_uring *ring, unsigned nr_events, int nowait) { struct io_uring_cqe *cqe; int i, ret = 0; for (i = 0; i < nr_events; i++) { if (!i && !nowait) ret = io_uring_wait_cqe(ring, &cqe); else ret = io_uring_peek_cqe(ring, &cqe); if (ret) { if (ret != -EAGAIN) fprintf(stderr, "cqe peek failed: %d\n", ret); break; } io_uring_cqe_seen(ring, cqe); } return i ? i : ret; } static int del_polls(struct io_uring *ring, int fd, int nr) { int batch, i, ret; struct io_uring_sqe *sqe; while (nr) { batch = 1024; if (batch > nr) batch = nr; for (i = 0; i < batch; i++) { void *data; sqe = io_uring_get_sqe(ring); data = sqe_index[lrand48() % nr]; io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)data); } ret = io_uring_submit(ring); if (ret != batch) { fprintf(stderr, "%s: failed submit, %d\n", __FUNCTION__, ret); return 1; } nr -= batch; ret = reap_events(ring, 2 * batch, 0); } return 0; } static int add_polls(struct io_uring *ring, int fd, int nr) { int batch, i, count, ret; struct io_uring_sqe *sqe; count = 0; while (nr) { batch = 1024; if (batch > nr) batch = nr; for (i = 0; i < batch; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_poll_add(sqe, fd, POLLIN); sqe_index[count++] = sqe; sqe->user_data = (unsigned long) sqe; } ret = io_uring_submit(ring); if (ret != batch) { fprintf(stderr, "%s: failed submit, %d\n", __FUNCTION__, ret); return 1; } nr -= batch; reap_events(ring, batch, 1); } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_params p = { }; int pipe1[2]; int ret; if (argc > 1) return 0; if (pipe(pipe1) != 0) { perror("pipe"); return 1; } p.flags = IORING_SETUP_CQSIZE; p.cq_entries = 16384; ret = io_uring_queue_init_params(1024, &ring, &p); if (ret) { if (ret == -EINVAL) { fprintf(stdout, "No CQSIZE, trying without\n"); ret = io_uring_queue_init(1024, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } } } add_polls(&ring, pipe1[0], 30000); del_polls(&ring, pipe1[0], 30000); io_uring_queue_exit(&ring); return 0; } liburing-2.6/test/poll-cancel.c000066400000000000000000000107651461424365000165260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring poll cancel handling * */ #include #include #include #include #include #include #include #include #include #include "liburing.h" struct poll_data { unsigned is_poll; unsigned is_cancel; }; static void sig_alrm(int sig) { fprintf(stderr, "Timed out!\n"); exit(1); } static int test_poll_cancel(void) { struct io_uring ring; int pipe1[2]; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct poll_data *pd, pds[2]; struct sigaction act; int ret; if (pipe(pipe1) != 0) { perror("pipe"); return 1; } ret = io_uring_queue_init(2, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } memset(&act, 0, sizeof(act)); act.sa_handler = sig_alrm; act.sa_flags = SA_RESTART; sigaction(SIGALRM, &act, NULL); alarm(1); sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } io_uring_prep_poll_add(sqe, pipe1[0], POLLIN); pds[0].is_poll = 1; pds[0].is_cancel = 0; io_uring_sqe_set_data(sqe, &pds[0]); ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed\n"); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } pds[1].is_poll = 0; pds[1].is_cancel = 1; io_uring_prep_poll_remove(sqe, (__u64)(uintptr_t)&pds[0]); io_uring_sqe_set_data(sqe, &pds[1]); ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait cqe failed: %d\n", ret); return 1; } pd = io_uring_cqe_get_data(cqe); if (pd->is_poll && cqe->res != -ECANCELED) { fprintf(stderr ,"sqe (add=%d/remove=%d) failed with %ld\n", pd->is_poll, pd->is_cancel, (long) cqe->res); return 1; } else if (pd->is_cancel && cqe->res) { fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", pd->is_poll, pd->is_cancel, (long) cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } pd = io_uring_cqe_get_data(cqe); if (pd->is_poll && cqe->res != -ECANCELED) { fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", pd->is_poll, pd->is_cancel, (long) cqe->res); return 1; } else if (pd->is_cancel && cqe->res) { fprintf(stderr, "sqe (add=%d/remove=%d) failed with %ld\n", pd->is_poll, pd->is_cancel, (long) cqe->res); return 1; } close(pipe1[0]); close(pipe1[1]); io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } static int __test_poll_cancel_with_timeouts(void) { struct __kernel_timespec ts = { .tv_sec = 10, }; struct io_uring ring, ring2; struct io_uring_sqe *sqe; int ret, off_nr = 1000; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = io_uring_queue_init(1, &ring2, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } /* test timeout-offset triggering path during cancellation */ sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts, off_nr, 0); /* poll ring2 to trigger cancellation on exit() */ sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, ring2.ring_fd, POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(&ring); io_uring_prep_link_timeout(sqe, &ts, 0); ret = io_uring_submit(&ring); if (ret != 3) { fprintf(stderr, "sqe submit failed\n"); return 1; } /* just drop all rings/etc. intact, exit() will clean them up */ return 0; } static int test_poll_cancel_with_timeouts(void) { int ret; pid_t p; p = fork(); if (p == -1) { fprintf(stderr, "fork() failed\n"); return 1; } if (p == 0) { ret = __test_poll_cancel_with_timeouts(); exit(ret); } else { int wstatus; if (waitpid(p, &wstatus, 0) == (pid_t)-1) { perror("waitpid()"); return 1; } if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); return 1; } } return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_poll_cancel(); if (ret) { fprintf(stderr, "test_poll_cancel failed\n"); return -1; } ret = test_poll_cancel_with_timeouts(); if (ret) { fprintf(stderr, "test_poll_cancel_with_timeouts failed\n"); return -1; } return 0; } liburing-2.6/test/poll-link.c000066400000000000000000000106431461424365000162310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static int recv_thread_ready = 0; static int recv_thread_done = 0; static void signal_var(int *var) { pthread_mutex_lock(&mutex); *var = 1; pthread_cond_signal(&cond); pthread_mutex_unlock(&mutex); } static void wait_for_var(int *var) { pthread_mutex_lock(&mutex); while (!*var) pthread_cond_wait(&cond, &mutex); pthread_mutex_unlock(&mutex); } struct data { unsigned expected[2]; unsigned is_mask[2]; unsigned long timeout; unsigned short port; unsigned int addr; int stop; }; static void *send_thread(void *arg) { struct sockaddr_in addr; struct data *data = arg; int s0; wait_for_var(&recv_thread_ready); s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); assert(s0 != -1); addr.sin_family = AF_INET; addr.sin_port = data->port; addr.sin_addr.s_addr = data->addr; if (connect(s0, (struct sockaddr*)&addr, sizeof(addr)) != -1) wait_for_var(&recv_thread_done); close(s0); return 0; } static void *recv_thread(void *arg) { struct sockaddr_in addr = { }; struct data *data = arg; struct io_uring_sqe *sqe; struct io_uring ring; int i, ret; ret = io_uring_queue_init(8, &ring, 0); assert(ret == 0); int s0 = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); assert(s0 != -1); int32_t val = 1; ret = setsockopt(s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); addr.sin_family = AF_INET; data->addr = inet_addr("127.0.0.1"); addr.sin_addr.s_addr = data->addr; if (t_bind_ephemeral_port(s0, &addr)) { perror("bind"); data->stop = 1; signal_var(&recv_thread_ready); goto err; } data->port = addr.sin_port; ret = listen(s0, 128); assert(ret != -1); signal_var(&recv_thread_ready); sqe = io_uring_get_sqe(&ring); assert(sqe != NULL); io_uring_prep_poll_add(sqe, s0, POLLIN | POLLHUP | POLLERR); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); assert(sqe != NULL); struct __kernel_timespec ts; ts.tv_sec = data->timeout / 1000000000; ts.tv_nsec = data->timeout % 1000000000; io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; ret = io_uring_submit(&ring); assert(ret == 2); for (i = 0; i < 2; i++) { struct io_uring_cqe *cqe; int idx; if (io_uring_wait_cqe(&ring, &cqe)) { fprintf(stderr, "wait cqe failed\n"); goto err; } idx = cqe->user_data - 1; if (data->is_mask[idx] && !(data->expected[idx] & cqe->res)) { fprintf(stderr, "cqe %" PRIu64 " got %x, wanted mask %x\n", (uint64_t) cqe->user_data, cqe->res, data->expected[idx]); goto err; } else if (!data->is_mask[idx] && cqe->res != data->expected[idx]) { fprintf(stderr, "cqe %" PRIu64 " got %d, wanted %d\n", (uint64_t) cqe->user_data, cqe->res, data->expected[idx]); goto err; } io_uring_cqe_seen(&ring, cqe); } signal_var(&recv_thread_done); close(s0); io_uring_queue_exit(&ring); return NULL; err: signal_var(&recv_thread_done); close(s0); io_uring_queue_exit(&ring); return (void *) 1; } static int test_poll_timeout(int do_connect, unsigned long timeout) { pthread_t t1, t2; struct data d; void *tret; int ret = 0; recv_thread_ready = 0; recv_thread_done = 0; memset(&d, 0, sizeof(d)); d.timeout = timeout; if (!do_connect) { d.expected[0] = -ECANCELED; d.expected[1] = -ETIME; } else { d.expected[0] = POLLIN; d.is_mask[0] = 1; d.expected[1] = -ECANCELED; } pthread_create(&t1, NULL, recv_thread, &d); if (do_connect) pthread_create(&t2, NULL, send_thread, &d); pthread_join(t1, &tret); if (tret) ret++; if (do_connect) { pthread_join(t2, &tret); if (tret) ret++; } return ret; } int main(int argc, char *argv[]) { if (argc > 1) return 0; srand(getpid()); if (test_poll_timeout(0, 200000000)) { fprintf(stderr, "poll timeout 0 failed\n"); return 1; } if (test_poll_timeout(1, 1000000000)) { fprintf(stderr, "poll timeout 1 failed\n"); return 1; } return 0; } liburing-2.6/test/poll-many.c000066400000000000000000000101761461424365000162410ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test many files being polled for * */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define NFILES 5000 #define BATCH 500 #define NLOOPS 1000 #define RING_SIZE 512 struct p { int fd[2]; int triggered; }; static struct p p[NFILES]; static int arm_poll(struct io_uring *ring, int off) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed getting sqe\n"); return 1; } io_uring_prep_poll_add(sqe, p[off].fd[0], POLLIN); sqe->user_data = off; return 0; } static int reap_polls(struct io_uring *ring) { struct io_uring_cqe *cqe; int i, ret, off; char c; for (i = 0; i < BATCH; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return ret; } off = cqe->user_data; p[off].triggered = 0; ret = read(p[off].fd[0], &c, 1); if (ret != 1) { fprintf(stderr, "read got %d/%d\n", ret, errno); break; } if (arm_poll(ring, off)) break; io_uring_cqe_seen(ring, cqe); } if (i != BATCH) { fprintf(stderr, "gave up at %d\n", i); return 1; } ret = io_uring_submit(ring); if (ret != BATCH) { fprintf(stderr, "submitted %d, %d\n", ret, BATCH); return 1; } return 0; } static int trigger_polls(void) { char c = 89; int i, ret; for (i = 0; i < BATCH; i++) { int off; do { off = rand() % NFILES; if (!p[off].triggered) break; } while (1); p[off].triggered = 1; ret = write(p[off].fd[1], &c, 1); if (ret != 1) { fprintf(stderr, "write got %d/%d\n", ret, errno); return 1; } } return 0; } static int arm_polls(struct io_uring *ring) { int ret, to_arm = NFILES, i, off; off = 0; while (to_arm) { int this_arm; this_arm = to_arm; if (this_arm > RING_SIZE) this_arm = RING_SIZE; for (i = 0; i < this_arm; i++) { if (arm_poll(ring, off)) { fprintf(stderr, "arm failed at %d\n", off); return 1; } off++; } ret = io_uring_submit(ring); if (ret != this_arm) { fprintf(stderr, "submitted %d, %d\n", ret, this_arm); return 1; } to_arm -= this_arm; } return 0; } static int do_test(struct io_uring *ring) { int i; if (arm_polls(ring)) return 1; for (i = 0; i < NLOOPS; i++) { trigger_polls(); if (reap_polls(ring)) return 1; } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_params params = { }; struct rlimit rlim; int i, ret; if (argc > 1) return 0; if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { perror("getrlimit"); return T_EXIT_FAIL; } if (rlim.rlim_cur < (2 * NFILES + 5)) { rlim.rlim_cur = (2 * NFILES + 5); rlim.rlim_max = rlim.rlim_cur; if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { if (errno == EPERM) goto err_nofail; perror("setrlimit"); return T_EXIT_FAIL; } } for (i = 0; i < NFILES; i++) { if (pipe(p[i].fd) < 0) { perror("pipe"); return T_EXIT_FAIL; } } params.flags = IORING_SETUP_CQSIZE; params.cq_entries = 4096; ret = io_uring_queue_init_params(RING_SIZE, &ring, ¶ms); if (ret) { if (ret == -EINVAL) { fprintf(stdout, "No CQSIZE, trying without\n"); params.flags &= ~IORING_SETUP_CQSIZE; params.cq_entries = 0; ret = io_uring_queue_init_params(RING_SIZE, &ring, ¶ms); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } } } if (do_test(&ring)) { fprintf(stderr, "test (normal) failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); if (t_probe_defer_taskrun()) { params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init_params(RING_SIZE, &ring, ¶ms); if (ret) { fprintf(stderr, "ring DEFER setup failed: %d\n", ret); return T_EXIT_FAIL; } if (do_test(&ring)) { fprintf(stderr, "test (DEFER) failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); } return 0; err_nofail: fprintf(stderr, "poll-many: not enough files available (and not root), " "skipped\n"); return 0; } liburing-2.6/test/poll-mshot-overflow.c000066400000000000000000000126321461424365000202670ustar00rootroot00000000000000// SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int check_final_cqe(struct io_uring *ring) { struct io_uring_cqe *cqe; int count = 0; bool signalled_no_more = false; while (!io_uring_peek_cqe(ring, &cqe)) { if (cqe->user_data == 1) { count++; if (signalled_no_more) { fprintf(stderr, "signalled no more!\n"); return T_EXIT_FAIL; } if (!(cqe->flags & IORING_CQE_F_MORE)) signalled_no_more = true; } else if (cqe->user_data != 3) { fprintf(stderr, "%d: got unexpected %d\n", count, (int)cqe->user_data); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } if (!count) { fprintf(stderr, "no cqe\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } static int test(bool defer_taskrun) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int pipe1[2]; int ret, i; if (pipe(pipe1) != 0) { perror("pipe"); return T_EXIT_FAIL; } struct io_uring_params params = { /* cheat using SINGLE_ISSUER existence to know if this behaviour * is updated */ .flags = IORING_SETUP_CQSIZE | IORING_SETUP_SINGLE_ISSUER, .cq_entries = 2 }; if (defer_taskrun) params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init_params(2, &ring, ¶ms); if (ret) return T_EXIT_SKIP; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } io_uring_prep_poll_multishot(sqe, pipe1[0], POLLIN); io_uring_sqe_set_data64(sqe, 1); if (io_uring_cq_ready(&ring)) { fprintf(stderr, "unexpected cqe\n"); return T_EXIT_FAIL; } for (i = 0; i < 2; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data64(sqe, 2); io_uring_submit(&ring); } do { errno = 0; ret = write(pipe1[1], "foo", 3); } while (ret == -1 && errno == EINTR); if (ret <= 0) { fprintf(stderr, "write failed: %d\n", errno); return T_EXIT_FAIL; } /* should have 2 cqe + 1 overflow now, so take out two cqes */ for (i = 0; i < 2; i++) { if (io_uring_peek_cqe(&ring, &cqe)) { fprintf(stderr, "unexpectedly no cqe\n"); return T_EXIT_FAIL; } if (cqe->user_data != 2) { fprintf(stderr, "unexpected user_data\n"); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); } /* make sure everything is processed */ io_uring_get_events(&ring); /* now remove the poll */ sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_remove(sqe, 1); io_uring_sqe_set_data64(sqe, 3); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "bad poll remove\n"); return T_EXIT_FAIL; } ret = check_final_cqe(&ring); close(pipe1[0]); close(pipe1[1]); io_uring_queue_exit(&ring); return ret; } static int test_downgrade(bool support_defer) { struct io_uring_cqe cqes[128]; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int fds[2]; int ret, i, cqe_count, tmp = 0, more_cqe_count; if (pipe(fds) != 0) { perror("pipe"); return -1; } struct io_uring_params params = { .flags = IORING_SETUP_CQSIZE, .cq_entries = 2 }; ret = io_uring_queue_init_params(2, &ring, ¶ms); if (ret) { fprintf(stderr, "queue init: %d\n", ret); return -1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return -1; } io_uring_prep_poll_multishot(sqe, fds[0], POLLIN); io_uring_sqe_set_data64(sqe, 1); io_uring_submit(&ring); for (i = 0; i < 8; i++) { ret = write(fds[1], &tmp, sizeof(tmp)); if (ret != sizeof(tmp)) { perror("write"); return -1; } ret = read(fds[0], &tmp, sizeof(tmp)); if (ret != sizeof(tmp)) { perror("read"); return -1; } } cqe_count = 0; while (!io_uring_peek_cqe(&ring, &cqe)) { cqes[cqe_count++] = *cqe; io_uring_cqe_seen(&ring, cqe); } /* Some kernels might allow overflows to poll, * but if they didn't it should stop the MORE flag */ if (cqe_count < 3) { fprintf(stderr, "too few cqes: %d\n", cqe_count); return -1; } else if (cqe_count == 8) { more_cqe_count = cqe_count; /* downgrade only available since support_defer */ if (support_defer) { fprintf(stderr, "did not downgrade on overflow\n"); return -1; } } else { more_cqe_count = cqe_count - 1; cqe = &cqes[cqe_count - 1]; if (cqe->flags & IORING_CQE_F_MORE) { fprintf(stderr, "incorrect MORE flag %x\n", cqe->flags); return -1; } } for (i = 0; i < more_cqe_count; i++) { cqe = &cqes[i]; if (!(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "missing MORE flag\n"); return -1; } if (cqe->res < 0) { fprintf(stderr, "bad res: %d\n", cqe->res); return -1; } } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ret; bool support_defer; if (argc > 1) return T_EXIT_SKIP; support_defer = t_probe_defer_taskrun(); ret = test_downgrade(support_defer); if (ret) { fprintf(stderr, "%s: test_downgrade(%d) failed\n", argv[0], support_defer); return T_EXIT_FAIL; } ret = test(false); if (ret == T_EXIT_SKIP) return ret; if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(false) failed\n", argv[0]); return ret; } if (support_defer) { ret = test(true); if (ret != T_EXIT_PASS) { fprintf(stderr, "%s: test(true) failed\n", argv[0]); return ret; } } return ret; } liburing-2.6/test/poll-mshot-update.c000066400000000000000000000131671461424365000177120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test many files being polled for and updated * */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #define NFILES 5000 #define BATCH 500 #define NLOOPS 1000 #define RING_SIZE 512 struct p { int fd[2]; int triggered; }; static struct p p[NFILES]; static int has_poll_update(void) { struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; bool has_update = false; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) return -1; sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_update(sqe, 0, 0, POLLIN, IORING_TIMEOUT_UPDATE); ret = io_uring_submit(&ring); if (ret != 1) return -1; ret = io_uring_wait_cqe(&ring, &cqe); if (!ret) { if (cqe->res == -ENOENT) has_update = true; else if (cqe->res != -EINVAL) return -1; io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return has_update; } static int arm_poll(struct io_uring *ring, int off) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "failed getting sqe\n"); return 1; } io_uring_prep_poll_multishot(sqe, p[off].fd[0], POLLIN); sqe->user_data = off; return 0; } static int submit_arm_poll(struct io_uring *ring, int off) { int ret; ret = arm_poll(ring, off); if (ret) return ret; ret = io_uring_submit(ring); if (ret < 0) return ret; return ret == 1 ? 0 : -1; } static int reap_polls(struct io_uring *ring) { struct io_uring_cqe *cqe; int i, ret, off; char c; for (i = 0; i < BATCH; i++) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); /* update event */ io_uring_prep_poll_update(sqe, i, 0, POLLIN, IORING_POLL_UPDATE_EVENTS); sqe->user_data = 0x12345678; } ret = io_uring_submit(ring); if (ret != BATCH) { fprintf(stderr, "submitted %d, %d\n", ret, BATCH); return 1; } for (i = 0; i < 2 * BATCH; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait cqe %d\n", ret); return ret; } off = cqe->user_data; if (off == 0x12345678) goto seen; if (!(cqe->flags & IORING_CQE_F_MORE)) { /* need to re-arm poll */ ret = submit_arm_poll(ring, off); if (ret) break; if (cqe->res <= 0) { /* retry this one */ i--; goto seen; } } ret = read(p[off].fd[0], &c, 1); if (ret != 1) { if (ret == -1 && errno == EAGAIN) goto seen; fprintf(stderr, "read got %d/%d\n", ret, errno); break; } seen: io_uring_cqe_seen(ring, cqe); } if (i != 2 * BATCH) { fprintf(stderr, "gave up at %d\n", i); return 1; } return 0; } static int trigger_polls(void) { char c = 89; int i, ret; for (i = 0; i < BATCH; i++) { int off; do { off = rand() % NFILES; if (!p[off].triggered) break; } while (1); p[off].triggered = 1; ret = write(p[off].fd[1], &c, 1); if (ret != 1) { fprintf(stderr, "write got %d/%d\n", ret, errno); return 1; } } return 0; } static void *trigger_polls_fn(void *data) { trigger_polls(); return NULL; } static int arm_polls(struct io_uring *ring) { int ret, to_arm = NFILES, i, off; off = 0; while (to_arm) { int this_arm; this_arm = to_arm; if (this_arm > RING_SIZE) this_arm = RING_SIZE; for (i = 0; i < this_arm; i++) { if (arm_poll(ring, off)) { fprintf(stderr, "arm failed at %d\n", off); return 1; } off++; } ret = io_uring_submit(ring); if (ret != this_arm) { fprintf(stderr, "submitted %d, %d\n", ret, this_arm); return 1; } to_arm -= this_arm; } return 0; } static int run(int cqe) { struct io_uring ring; struct io_uring_params params = { }; pthread_t thread; int i, j, ret; for (i = 0; i < NFILES; i++) { if (pipe(p[i].fd) < 0) { perror("pipe"); return 1; } fcntl(p[i].fd[0], F_SETFL, O_NONBLOCK); } params.flags = IORING_SETUP_CQSIZE; params.cq_entries = cqe; ret = io_uring_queue_init_params(RING_SIZE, &ring, ¶ms); if (ret) { if (ret == -EINVAL) { fprintf(stdout, "No CQSIZE, trying without\n"); ret = io_uring_queue_init(RING_SIZE, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } } } if (arm_polls(&ring)) goto err; for (i = 0; i < NLOOPS; i++) { pthread_create(&thread, NULL, trigger_polls_fn, NULL); ret = reap_polls(&ring); if (ret) goto err; pthread_join(thread, NULL); for (j = 0; j < NFILES; j++) p[j].triggered = 0; } io_uring_queue_exit(&ring); for (i = 0; i < NFILES; i++) { close(p[i].fd[0]); close(p[i].fd[1]); } return 0; err: io_uring_queue_exit(&ring); return 1; } int main(int argc, char *argv[]) { struct rlimit rlim; int ret; if (argc > 1) return 0; ret = has_poll_update(); if (ret < 0) { fprintf(stderr, "poll update check failed %i\n", ret); return -1; } else if (!ret) { fprintf(stderr, "no poll update, skip\n"); return 0; } if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { perror("getrlimit"); goto err; } if (rlim.rlim_cur < (2 * NFILES + 5)) { rlim.rlim_cur = (2 * NFILES + 5); rlim.rlim_max = rlim.rlim_cur; if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { if (errno == EPERM) goto err_nofail; perror("setrlimit"); goto err; } } ret = run(1024); if (ret) { fprintf(stderr, "run(1024) failed\n"); goto err; } ret = run(8192); if (ret) { fprintf(stderr, "run(8192) failed\n"); goto err; } return 0; err: fprintf(stderr, "poll-many failed\n"); return 1; err_nofail: fprintf(stderr, "poll-many: not enough files available (and not root), " "skipped\n"); return 0; } liburing-2.6/test/poll-race-mshot.c000066400000000000000000000131021461424365000173270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: check that racing wakeups don't re-issue a poll multishot, * this can leak ring provided buffers. also test if ring * provided buffers for regular receive can leak if we hit a * poll race. */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define NREQS 64 #define BUF_SIZE 64 static int no_buf_ring; struct data { pthread_barrier_t barrier; int fd; }; static void *thread(void *data) { struct data *d = data; char buf[BUF_SIZE]; int ret, i, fd; memset(buf, 0x5a, BUF_SIZE); pthread_barrier_wait(&d->barrier); fd = d->fd; for (i = 0; i < NREQS; i++) { ret = write(fd, buf, sizeof(buf)); if (ret != BUF_SIZE) { if (ret < 0) { perror("write"); printf("bad fd %d\n", fd); } else fprintf(stderr, "wrote short %d\n", ret); } } return NULL; } static int test(struct io_uring *ring, struct data *d) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fd[2], ret, i; pthread_t t; void *buf, *ptr; void *ret2; if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) { perror("socketpair"); return T_EXIT_FAIL; } d->fd = fd[1]; if (posix_memalign((void **) &buf, 16384, BUF_SIZE * NREQS)) return T_EXIT_FAIL; br = io_uring_setup_buf_ring(ring, NREQS, 1, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return T_EXIT_SKIP; } fprintf(stderr, "buf ring reg %d\n", ret); return T_EXIT_FAIL; } ptr = buf; for (i = 0; i < NREQS; i++) { io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, io_uring_buf_ring_mask(NREQS), i); ptr += BUF_SIZE; } io_uring_buf_ring_advance(br, NREQS); pthread_create(&t, NULL, thread, d); for (i = 0; i < NREQS; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_recv(sqe, fd[0], NULL, 0, 0); sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = 1; } pthread_barrier_wait(&d->barrier); ret = io_uring_submit(ring); if (ret != NREQS) { fprintf(stderr, "submit %d\n", ret); return T_EXIT_FAIL; } i = 0; do { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait %d\n", ret); return T_EXIT_FAIL; } i++; if (cqe->res != BUF_SIZE) { fprintf(stderr, "Bad cqe res %d\n", cqe->res); break; } if (cqe->flags & IORING_CQE_F_BUFFER) { int bid = cqe->flags >> 16; if (bid > NREQS) { fprintf(stderr, "Bad BID %d\n", bid); return T_EXIT_FAIL; } } else { fprintf(stderr, "No BID set!\n"); printf("ret=%d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); if (i > NREQS) { fprintf(stderr, "Got too many requests?\n"); return T_EXIT_FAIL; } } while (i < NREQS); pthread_join(t, &ret2); free(buf); io_uring_free_buf_ring(ring, br, NREQS, 1); close(fd[0]); close(fd[1]); return T_EXIT_PASS; } static int test_mshot(struct io_uring *ring, struct data *d) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fd[2], ret, i; pthread_t t; void *buf, *ptr; void *ret2; if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) { perror("socketpair"); return T_EXIT_FAIL; } d->fd = fd[1]; if (posix_memalign((void *) &buf, 16384, BUF_SIZE * NREQS)) return T_EXIT_FAIL; br = io_uring_setup_buf_ring(ring, NREQS, 1, 0, &ret); if (!br) { fprintf(stderr, "buf ring reg %d\n", ret); return T_EXIT_FAIL; } ptr = buf; for (i = 0; i < NREQS; i++) { io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, io_uring_buf_ring_mask(NREQS), i); ptr += BUF_SIZE; } io_uring_buf_ring_advance(br, NREQS); pthread_create(&t, NULL, thread, d); sqe = io_uring_get_sqe(ring); io_uring_prep_recv_multishot(sqe, fd[0], NULL, 0, 0); sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = 1; pthread_barrier_wait(&d->barrier); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit %d\n", ret); return T_EXIT_FAIL; } i = 0; do { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait %d\n", ret); return T_EXIT_FAIL; } i++; if (!(cqe->flags & IORING_CQE_F_MORE)) break; if (cqe->res != BUF_SIZE) { fprintf(stderr, "Bad cqe res %d\n", cqe->res); break; } if (cqe->flags & IORING_CQE_F_BUFFER) { int bid = cqe->flags >> 16; if (bid > NREQS) { fprintf(stderr, "Bad BID %d\n", bid); return T_EXIT_FAIL; } } else { fprintf(stderr, "No BID set!\n"); printf("ret=%d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); if (i > NREQS) { fprintf(stderr, "Got too many requests?\n"); return T_EXIT_FAIL; } } while (1); if (i != NREQS + 1) { fprintf(stderr, "Only got %d requests\n", i); return T_EXIT_FAIL; } pthread_join(t, &ret2); io_uring_free_buf_ring(ring, br, NREQS, 1); free(buf); close(fd[0]); close(fd[1]); return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring ring; struct data d; int i, ret; if (argc > 1) return T_EXIT_SKIP; pthread_barrier_init(&d.barrier, NULL, 2); for (i = 0; i < 1000; i++) { io_uring_queue_init(NREQS, &ring, 0); ret = test(&ring, &d); if (ret != T_EXIT_PASS) { if (no_buf_ring) break; fprintf(stderr, "Test failed loop %d\n", i); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); } if (no_buf_ring) return T_EXIT_SKIP; for (i = 0; i < 1000; i++) { io_uring_queue_init(NREQS, &ring, 0); ret = test_mshot(&ring, &d); if (ret != T_EXIT_PASS) { fprintf(stderr, "Test mshot failed loop %d\n", i); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); } return T_EXIT_PASS; } liburing-2.6/test/poll-race.c000066400000000000000000000036211461424365000162040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: check that multiple receives on the same socket don't get * stalled if multiple wakers race with the socket readiness. */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define NREQS 64 struct data { pthread_barrier_t barrier; int fd; }; static void *thread(void *data) { struct data *d = data; char buf[64]; int ret, i; pthread_barrier_wait(&d->barrier); for (i = 0; i < NREQS; i++) { ret = write(d->fd, buf, sizeof(buf)); if (ret != 64) fprintf(stderr, "wrote short %d\n", ret); } return NULL; } static int test(struct io_uring *ring, struct data *d) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int fd[2], ret, i; char buf[64]; pthread_t t; void *ret2; if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fd) < 0) { perror("socketpair"); return T_EXIT_FAIL; } d->fd = fd[1]; pthread_create(&t, NULL, thread, d); for (i = 0; i < NREQS; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_recv(sqe, fd[0], buf, sizeof(buf), 0); } pthread_barrier_wait(&d->barrier); ret = io_uring_submit(ring); if (ret != NREQS) { fprintf(stderr, "submit %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < NREQS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait %d\n", ret); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } close(fd[0]); close(fd[1]); pthread_join(t, &ret2); return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring ring; struct data d; int i, ret; if (argc > 1) return T_EXIT_SKIP; pthread_barrier_init(&d.barrier, NULL, 2); io_uring_queue_init(NREQS, &ring, 0); for (i = 0; i < 1000; i++) { ret = test(&ring, &d); if (ret != T_EXIT_PASS) { fprintf(stderr, "Test failed\n"); return T_EXIT_FAIL; } } return T_EXIT_PASS; } liburing-2.6/test/poll-ring.c000066400000000000000000000016341461424365000162330ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Test poll against ring itself. A buggy kernel will end up * having io_wq_* workers pending, as the circular reference * will prevent full exit. * */ #include #include #include #include #include #include #include "liburing.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring ring; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); io_uring_sqe_set_data(sqe, sqe); ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); return 1; } return 0; } liburing-2.6/test/poll-v-poll.c000066400000000000000000000143631461424365000165100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring poll handling * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" struct thread_data { struct io_uring *ring; int fd; int events; const char *test; int out[2]; }; static void *epoll_wait_fn(void *data) { struct thread_data *td = data; struct epoll_event ev; if (epoll_wait(td->fd, &ev, 1, -1) < 0) { perror("epoll_wait"); goto err; } return NULL; err: return (void *) 1; } static void *iou_poll(void *data) { struct thread_data *td = data; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(td->ring); io_uring_prep_poll_add(sqe, td->fd, td->events); ret = io_uring_submit(td->ring); if (ret != 1) { fprintf(stderr, "submit got %d\n", ret); goto err; } ret = io_uring_wait_cqe(td->ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); goto err; } td->out[0] = cqe->res & 0x3f; io_uring_cqe_seen(td->ring, cqe); return NULL; err: return (void *) 1; } static void *poll_pipe(void *data) { struct thread_data *td = data; struct pollfd pfd; int ret; pfd.fd = td->fd; pfd.events = td->events; ret = poll(&pfd, 1, -1); if (ret < 0) perror("poll"); td->out[1] = pfd.revents; return NULL; } static int do_pipe_pollin_test(struct io_uring *ring) { struct thread_data td; pthread_t threads[2]; int ret, pipe1[2]; char buf; if (pipe(pipe1) < 0) { perror("pipe"); return 1; } td.ring = ring; td.fd = pipe1[0]; td.events = POLLIN; td.test = __FUNCTION__; pthread_create(&threads[1], NULL, iou_poll, &td); pthread_create(&threads[0], NULL, poll_pipe, &td); usleep(100000); buf = 0x89; ret = write(pipe1[1], &buf, sizeof(buf)); if (ret != sizeof(buf)) { fprintf(stderr, "write failed: %d\n", ret); return 1; } pthread_join(threads[0], NULL); pthread_join(threads[1], NULL); if (td.out[0] != td.out[1]) { fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, td.out[0], td.out[1]); return 1; } return 0; } static int do_pipe_pollout_test(struct io_uring *ring) { struct thread_data td; pthread_t threads[2]; int ret, pipe1[2]; char buf; if (pipe(pipe1) < 0) { perror("pipe"); return 1; } td.ring = ring; td.fd = pipe1[1]; td.events = POLLOUT; td.test = __FUNCTION__; pthread_create(&threads[0], NULL, poll_pipe, &td); pthread_create(&threads[1], NULL, iou_poll, &td); usleep(100000); buf = 0x89; ret = write(pipe1[1], &buf, sizeof(buf)); if (ret != sizeof(buf)) { fprintf(stderr, "write failed: %d\n", ret); return 1; } pthread_join(threads[0], NULL); pthread_join(threads[1], NULL); if (td.out[0] != td.out[1]) { fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, td.out[0], td.out[1]); return 1; } return 0; } static int do_fd_test(struct io_uring *ring, const char *fname, int events) { struct thread_data td; pthread_t threads[2]; int fd; fd = open(fname, O_RDONLY); if (fd < 0) { perror("open"); return 1; } td.ring = ring; td.fd = fd; td.events = events; td.test = __FUNCTION__; pthread_create(&threads[0], NULL, poll_pipe, &td); pthread_create(&threads[1], NULL, iou_poll, &td); pthread_join(threads[0], NULL); pthread_join(threads[1], NULL); if (td.out[0] != td.out[1]) { fprintf(stderr, "%s: res %x/%x differ\n", __FUNCTION__, td.out[0], td.out[1]); return 1; } return 0; } static int iou_epoll_ctl(struct io_uring *ring, int epfd, int fd, struct epoll_event *ev) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Failed to get sqe\n"); return 1; } io_uring_prep_epoll_ctl(sqe, epfd, fd, EPOLL_CTL_ADD, ev); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; } static int do_test_epoll(struct io_uring *ring, int iou_epoll_add) { struct epoll_event ev; struct thread_data td; pthread_t threads[2]; int ret, pipe1[2]; char buf; int fd; fd = epoll_create1(0); if (fd < 0) { perror("epoll_create"); return 1; } if (pipe(pipe1) < 0) { perror("pipe"); return 1; } ev.events = EPOLLIN; ev.data.fd = pipe1[0]; if (!iou_epoll_add) { if (epoll_ctl(fd, EPOLL_CTL_ADD, pipe1[0], &ev) < 0) { perror("epoll_ctrl"); return 1; } } else { ret = iou_epoll_ctl(ring, fd, pipe1[0], &ev); if (ret == -EINVAL) { fprintf(stdout, "epoll not supported, skipping\n"); return 0; } else if (ret < 0) { return 1; } } td.ring = ring; td.fd = fd; td.events = POLLIN; td.test = __FUNCTION__; pthread_create(&threads[0], NULL, iou_poll, &td); pthread_create(&threads[1], NULL, epoll_wait_fn, &td); usleep(100000); buf = 0x89; ret = write(pipe1[1], &buf, sizeof(buf)); if (ret != sizeof(buf)) { fprintf(stderr, "write failed: %d\n", ret); return 1; } pthread_join(threads[0], NULL); pthread_join(threads[1], NULL); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; const char *fname; int ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = do_pipe_pollin_test(&ring); if (ret) { fprintf(stderr, "pipe pollin test failed\n"); return ret; } ret = do_pipe_pollout_test(&ring); if (ret) { fprintf(stderr, "pipe pollout test failed\n"); return ret; } ret = do_test_epoll(&ring, 0); if (ret) { fprintf(stderr, "epoll test 0 failed\n"); return ret; } ret = do_test_epoll(&ring, 1); if (ret) { fprintf(stderr, "epoll test 1 failed\n"); return ret; } if (argc > 1) fname = argv[1]; else fname = argv[0]; ret = do_fd_test(&ring, fname, POLLIN); if (ret) { fprintf(stderr, "fd test IN failed\n"); return ret; } ret = do_fd_test(&ring, fname, POLLOUT); if (ret) { fprintf(stderr, "fd test OUT failed\n"); return ret; } ret = do_fd_test(&ring, fname, POLLOUT | POLLIN); if (ret) { fprintf(stderr, "fd test IN|OUT failed\n"); return ret; } return 0; } liburing-2.6/test/poll.c000066400000000000000000000152721461424365000153010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring poll handling * */ #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static void do_setsockopt(int fd, int level, int optname, int val) { if (setsockopt(fd, level, optname, &val, sizeof(val))) t_error(1, errno, "setsockopt %d.%d: %d", level, optname, val); } static bool check_cq_empty(struct io_uring *ring) { struct io_uring_cqe *cqe = NULL; int ret; ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */ return ret == -EAGAIN; } static int test_basic(void) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int pipe1[2]; pid_t p; int ret; if (pipe(pipe1) != 0) { perror("pipe"); return 1; } p = fork(); if (p == -1) { perror("fork"); exit(2); } else if (p == 0) { ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return 1; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return 1; } io_uring_prep_poll_add(sqe, pipe1[0], POLLIN); io_uring_sqe_set_data(sqe, sqe); ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "child: sqe submit failed: %d\n", ret); return 1; } do { ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "child: wait completion %d\n", ret); break; } io_uring_cqe_seen(&ring, cqe); } while (ret != 0); if (ret < 0) return 1; if (cqe->user_data != (unsigned long) sqe) { fprintf(stderr, "child: cqe doesn't match sqe\n"); return 1; } if ((cqe->res & POLLIN) != POLLIN) { fprintf(stderr, "child: bad return value %ld\n", (long) cqe->res); return 1; } io_uring_queue_exit(&ring); exit(0); } do { errno = 0; ret = write(pipe1[1], "foo", 3); } while (ret == -1 && errno == EINTR); if (ret != 3) { fprintf(stderr, "parent: bad write return %d\n", ret); return 1; } close(pipe1[0]); close(pipe1[1]); return 0; } static int test_missing_events(void) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int i, ret, sp[2]; char buf[2] = {}; int res_mask = 0; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0) { perror("Failed to create Unix-domain socket pair\n"); return 1; } do_setsockopt(sp[0], SOL_SOCKET, SO_SNDBUF, 1); ret = send(sp[0], buf, sizeof(buf), 0); if (ret != sizeof(buf)) { perror("send failed\n"); return 1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_multishot(sqe, sp[0], POLLIN|POLLOUT); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } /* trigger EPOLLIN */ ret = send(sp[1], buf, sizeof(buf), 0); if (ret != sizeof(buf)) { fprintf(stderr, "send sp[1] failed %i %i\n", ret, errno); return 1; } /* trigger EPOLLOUT */ ret = recv(sp[1], buf, sizeof(buf), 0); if (ret != sizeof(buf)) { perror("recv failed\n"); return 1; } for (i = 0; ; i++) { if (i == 0) ret = io_uring_wait_cqe(&ring, &cqe); else ret = io_uring_peek_cqe(&ring, &cqe); if (i != 0 && ret == -EAGAIN) { break; } if (ret) { fprintf(stderr, "wait completion %d, %i\n", ret, i); return 1; } res_mask |= cqe->res; io_uring_cqe_seen(&ring, cqe); } if ((res_mask & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) { fprintf(stderr, "missing poll events %i\n", res_mask); return 1; } io_uring_queue_exit(&ring); close(sp[0]); close(sp[1]); return 0; } #define NR_SQES 2048 static int test_self_poll(void) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring; int ret, i, j; ret = io_uring_queue_init(NR_SQES, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } for (j = 0; j < 32; j++) { for (i = 0; i < NR_SQES; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); } ret = io_uring_submit(&ring); assert(ret == NR_SQES); } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); ret = io_uring_submit(&ring); assert(ret == 1); ret = io_uring_wait_cqe(&ring, &cqe); io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return T_EXIT_PASS; } static int test_disabled_ring_lazy_polling(int early_poll) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct io_uring ring, ring2; unsigned head; int ret, i = 0; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = io_uring_queue_init(8, &ring2, 0); if (ret) { fprintf(stderr, "ring2 setup failed: %d\n", ret); return 1; } if (early_poll) { /* start polling disabled DEFER_TASKRUN ring */ sqe = io_uring_get_sqe(&ring2); io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); ret = io_uring_submit(&ring2); assert(ret == 1); assert(check_cq_empty(&ring2)); } /* enable rings, which should also activate pollwq */ ret = io_uring_enable_rings(&ring); assert(ret >= 0); if (!early_poll) { /* start polling enabled DEFER_TASKRUN ring */ sqe = io_uring_get_sqe(&ring2); io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); ret = io_uring_submit(&ring2); assert(ret == 1); assert(check_cq_empty(&ring2)); } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); ret = io_uring_submit(&ring); assert(ret == 1); io_uring_for_each_cqe(&ring2, head, cqe) { i++; } if (i != 1) { fprintf(stderr, "fail, polling stuck\n"); return 1; } io_uring_queue_exit(&ring); io_uring_queue_exit(&ring2); return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_basic(); if (ret) { fprintf(stderr, "test_basic() failed %i\n", ret); return T_EXIT_FAIL; } if (t_probe_defer_taskrun()) { ret = test_missing_events(); if (ret) { fprintf(stderr, "test_missing_events() failed %i\n", ret); return T_EXIT_FAIL; } ret = test_disabled_ring_lazy_polling(false); if (ret) { fprintf(stderr, "test_disabled_ring_lazy_polling(false) failed %i\n", ret); return T_EXIT_FAIL; } ret = test_disabled_ring_lazy_polling(true); if (ret) { fprintf(stderr, "test_disabled_ring_lazy_polling(true) failed %i\n", ret); return T_EXIT_FAIL; } } ret = test_self_poll(); if (ret) { fprintf(stderr, "test_self_poll failed\n"); return T_EXIT_FAIL; } return 0; } liburing-2.6/test/probe.c000066400000000000000000000047131461424365000154400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test IORING_REGISTER_PROBE */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int no_probe; static int verify_probe(struct io_uring_probe *p, int full) { if (!full && p->ops_len) { fprintf(stderr, "Got ops_len=%u\n", p->ops_len); return 1; } if (!p->last_op) { fprintf(stderr, "Got last_op=%u\n", p->last_op); return 1; } if (!full) return 0; /* check a few ops that must be supported */ if (!(p->ops[IORING_OP_NOP].flags & IO_URING_OP_SUPPORTED)) { fprintf(stderr, "NOP not supported!?\n"); return 1; } if (!(p->ops[IORING_OP_READV].flags & IO_URING_OP_SUPPORTED)) { fprintf(stderr, "READV not supported!?\n"); return 1; } if (!(p->ops[IORING_OP_WRITE].flags & IO_URING_OP_SUPPORTED)) { fprintf(stderr, "WRITE not supported!?\n"); return 1; } return 0; } static int test_probe_helper(struct io_uring *ring) { int ret; struct io_uring_probe *p; p = io_uring_get_probe_ring(ring); if (!p) { fprintf(stderr, "Failed getting probe data\n"); return 1; } ret = verify_probe(p, 1); io_uring_free_probe(p); return ret; } static int test_probe(struct io_uring *ring) { struct io_uring_probe *p; size_t len; int ret; len = sizeof(*p) + 256 * sizeof(struct io_uring_probe_op); p = t_calloc(1, len); ret = io_uring_register_probe(ring, p, 0); if (ret == -EINVAL) { fprintf(stdout, "Probe not supported, skipping\n"); no_probe = 1; goto out; } else if (ret) { fprintf(stdout, "Probe returned %d\n", ret); goto err; } if (verify_probe(p, 0)) goto err; /* now grab for all entries */ memset(p, 0, len); ret = io_uring_register_probe(ring, p, 256); if (ret == -EINVAL) { fprintf(stdout, "Probe not supported, skipping\n"); goto err; } else if (ret) { fprintf(stdout, "Probe returned %d\n", ret); goto err; } if (verify_probe(p, 1)) goto err; out: free(p); return 0; err: free(p); return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_probe(&ring); if (ret) { fprintf(stderr, "test_probe failed\n"); return ret; } if (no_probe) return 0; ret = test_probe_helper(&ring); if (ret) { fprintf(stderr, "test_probe failed\n"); return ret; } return 0; } liburing-2.6/test/read-before-exit.c000066400000000000000000000047601461424365000174550ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test if issuing IO from thread and immediately exiting will * proceed correctly. * * Original test case from: https://github.com/axboe/liburing/issues/582 */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_iopoll; struct data { struct io_uring *ring; int timer_fd1; int timer_fd2; uint64_t buf1; uint64_t buf2; }; static void *submit(void *data) { struct io_uring_sqe *sqe; struct data *d = data; int ret; sqe = io_uring_get_sqe(d->ring); io_uring_prep_read(sqe, d->timer_fd1, &d->buf1, sizeof(d->buf1), 0); sqe = io_uring_get_sqe(d->ring); io_uring_prep_read(sqe, d->timer_fd2, &d->buf2, sizeof(d->buf2), 0); ret = io_uring_submit(d->ring); if (ret != 2) { struct io_uring_cqe *cqe; /* * Kernels without submit-all-on-error behavior will * fail submitting all, check if that's the case and * don't error */ ret = io_uring_peek_cqe(d->ring, &cqe); if (!ret && cqe->res == -EOPNOTSUPP) { no_iopoll = 1; return NULL; } return (void *) (uintptr_t) 1; } /* Exit suddenly. */ return NULL; } static int test(int flags) { struct io_uring_params params = { .flags = flags, }; struct io_uring ring; struct data d = { .ring = &ring, }; pthread_t thread; void *res; int ret; ret = t_create_ring_params(8, &ring, ¶ms); if (ret == T_SETUP_SKIP) return 0; else if (ret != T_SETUP_OK) return 1; d.timer_fd1 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC); if (d.timer_fd1 < 0) { perror("timerfd_create"); return 1; } d.timer_fd2 = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC); if (d.timer_fd2 < 0) { perror("timerfd_create"); return 1; } pthread_create(&thread, NULL, submit, &d); pthread_join(thread, &res); /** Wait for completions and do stuff ... **/ io_uring_queue_exit(&ring); close(d.timer_fd1); close(d.timer_fd2); return !!res; } int main(int argc, char *argv[]) { int ret, i; for (i = 0; i < 1000; i++) { ret = test(0); if (ret) { fprintf(stderr, "Test failed\n"); return ret; } } for (i = 0; i < 1000; i++) { ret = test(IORING_SETUP_IOPOLL); if (ret) { fprintf(stderr, "Test IOPOLL failed loop %d\n", ret); return ret; } if (no_iopoll) break; } for (i = 0; i < 100; i++) { ret = test(IORING_SETUP_SQPOLL); if (ret) { fprintf(stderr, "Test SQPOLL failed\n"); return ret; } } return 0; } liburing-2.6/test/read-mshot-empty.c000066400000000000000000000064741461424365000175360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that multishot read correctly keeps reading until all * data has been emptied. the original implementation failed * to do so, if the available buffer size was less than what * was available, hence requiring multiple reads to empty the * file buffer. */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BGID 17 #define NR_BUFS 4 #define BR_MASK (NR_BUFS - 1) #define BUF_SIZE 32 static int do_write(int fd, void *buf, int buf_size) { int ret; ret = write(fd, buf, buf_size); if (ret < 0) { perror("write"); return 0; } else if (ret != buf_size) { fprintf(stderr, "bad write size %d\n", ret); return 0; } return 1; } static void *thread_fn(void *data) { char w1[BUF_SIZE], w2[BUF_SIZE]; int *fds = data; memset(w1, 0x11, BUF_SIZE); memset(w2, 0x22, BUF_SIZE); if (!do_write(fds[1], w1, BUF_SIZE)) return NULL; if (!do_write(fds[1], w2, BUF_SIZE)) return NULL; usleep(100000); if (!do_write(fds[1], w1, BUF_SIZE)) return NULL; if (!do_write(fds[1], w2, BUF_SIZE)) return NULL; return NULL; } int main(int argc, char *argv[]) { struct io_uring_buf_ring *br; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; pthread_t thread; int i, ret, fds[2]; void *buf, *tret; if (argc > 1) return T_EXIT_SKIP; if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue_init: %d\n", ret); return T_EXIT_FAIL; } br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret); if (!br) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "failed buffer ring %d\n", ret); return T_EXIT_FAIL; } buf = malloc(NR_BUFS * BUF_SIZE); for (i = 0; i < NR_BUFS; i++) { void *this_buf = buf + i * BUF_SIZE; io_uring_buf_ring_add(br, this_buf, BUF_SIZE, i, BR_MASK, i); } io_uring_buf_ring_advance(br, NR_BUFS); sqe = io_uring_get_sqe(&ring); io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BGID); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "bad submit %d\n", ret); return T_EXIT_FAIL; } /* * read multishot not available would be ready as a cqe when * submission returns, check and skip if not. */ ret = io_uring_peek_cqe(&ring, &cqe); if (!ret) { if (cqe->res == -EINVAL || cqe->res == -EBADF) return T_EXIT_SKIP; } pthread_create(&thread, NULL, thread_fn, fds); for (i = 0; i < 4; i++) { int buf_index; ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait %d\n", ret); break; } if (cqe->res != BUF_SIZE) { fprintf(stderr, "size %d\n", cqe->res); return T_EXIT_FAIL; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "buffer not set\n"); return T_EXIT_FAIL; } if (!(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "more not set\n"); return T_EXIT_FAIL; } buf_index = cqe->flags >> 16; assert(buf_index >= 0 && buf_index <= NR_BUFS); io_uring_cqe_seen(&ring, cqe); } pthread_join(thread, &tret); return T_EXIT_PASS; } liburing-2.6/test/read-mshot.c000066400000000000000000000205201461424365000163660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test multishot read (IORING_OP_READ_MULTISHOT) on pipes, * using ring provided buffers * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BUF_SIZE 32 #define BUF_SIZE_FIRST 17 #define NR_BUFS 64 #define BUF_BGID 1 #define BR_MASK (NR_BUFS - 1) #define NR_OVERFLOW (NR_BUFS / 4) static int no_buf_ring, no_read_mshot; static int test_clamp(void) { struct io_uring_buf_ring *br; struct io_uring_params p = { }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, fds[2], i; char tmp[32]; char *buf; void *ptr; ret = io_uring_queue_init_params(4, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (pipe(fds) < 0) { perror("pipe"); return 1; } if (posix_memalign((void **) &buf, 4096, NR_BUFS * BUF_SIZE)) return 1; br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return 0; } fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ptr = buf; io_uring_buf_ring_add(br, buf, 16, 1, BR_MASK, 0); buf += 16; io_uring_buf_ring_add(br, buf, 32, 2, BR_MASK, 1); buf += 32; io_uring_buf_ring_add(br, buf, 32, 3, BR_MASK, 2); buf += 32; io_uring_buf_ring_add(br, buf, 32, 4, BR_MASK, 3); buf += 32; io_uring_buf_ring_advance(br, 4); memset(tmp, 0xaa, sizeof(tmp)); sqe = io_uring_get_sqe(&ring); io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } /* prevent pipe buffer merging */ usleep(1000); ret = write(fds[1], tmp, 16); usleep(1000); ret = write(fds[1], tmp, sizeof(tmp)); /* prevent pipe buffer merging */ usleep(1000); ret = write(fds[1], tmp, 16); usleep(1000); ret = write(fds[1], tmp, sizeof(tmp)); /* * We should see a 16 byte completion, then a 32 byte, then a 16 byte, * and finally a 32 byte again. */ for (i = 0; i < 4; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe res: %d\n", cqe->res); return 1; } if (!(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "no more cqes\n"); return 1; } if (i == 0 || i == 2) { if (cqe->res != 16) { fprintf(stderr, "%d cqe got %d\n", i, cqe->res); return 1; } } else if (i == 1 || i == 3) { if (cqe->res != 32) { fprintf(stderr, "%d cqe got %d\n", i, cqe->res); return 1; } } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); free(ptr); return 0; } static int test(int first_good, int async, int overflow) { struct io_uring_buf_ring *br; struct io_uring_params p = { }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, fds[2], i; char tmp[32]; void *ptr[NR_BUFS]; p.flags = IORING_SETUP_CQSIZE; if (!overflow) p.cq_entries = NR_BUFS + 1; else p.cq_entries = NR_OVERFLOW; ret = io_uring_queue_init_params(1, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (pipe(fds) < 0) { perror("pipe"); return 1; } br = io_uring_setup_buf_ring(&ring, NR_BUFS, BUF_BGID, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return 0; } fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } for (i = 0; i < NR_BUFS; i++) { unsigned size = i <= 1 ? BUF_SIZE_FIRST : BUF_SIZE; ptr[i] = malloc(size); if (!ptr[i]) return 1; io_uring_buf_ring_add(br, ptr[i], size, i + 1, BR_MASK, i); } io_uring_buf_ring_advance(br, NR_BUFS); if (first_good) { sprintf(tmp, "this is buffer %d\n", 0); ret = write(fds[1], tmp, strlen(tmp)); } sqe = io_uring_get_sqe(&ring); /* len == 0 means just use the defined provided buffer length */ io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BUF_BGID); if (async) sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } /* write NR_BUFS + 1, or if first_good is set, NR_BUFS */ for (i = 0; i < NR_BUFS + !first_good; i++) { /* prevent pipe buffer merging */ usleep(1000); sprintf(tmp, "this is buffer %d\n", i + 1); ret = write(fds[1], tmp, strlen(tmp)); if (ret != strlen(tmp)) { fprintf(stderr, "write ret %d\n", ret); return 1; } } for (i = 0; i < NR_BUFS + 1; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return 1; } if (cqe->res < 0) { /* expected failure as we try to read one too many */ if (cqe->res == -ENOBUFS && i == NR_BUFS) break; if (!i && cqe->res == -EINVAL) { no_read_mshot = 1; break; } fprintf(stderr, "%d: cqe res %d\n", i, cqe->res); return 1; } else if (i > 9 && cqe->res <= 17) { fprintf(stderr, "truncated message %d %d\n", i, cqe->res); return 1; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "no buffer selected\n"); return 1; } if (!(cqe->flags & IORING_CQE_F_MORE)) { /* we expect this on overflow */ if (overflow && i >= NR_OVERFLOW) break; fprintf(stderr, "no more cqes\n"); return 1; } /* should've overflown! */ if (overflow && i > NR_OVERFLOW) { fprintf(stderr, "Expected overflow!\n"); return 1; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); for (i = 0; i < NR_BUFS; i++) free(ptr[i]); return 0; } static int test_invalid(int async) { struct io_uring_buf_ring *br; struct io_uring_params p = { }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; char fname[32] = ".mshot.%d.XXXXXX"; int ret, fd; char *buf; p.flags = IORING_SETUP_CQSIZE; p.cq_entries = NR_BUFS; ret = io_uring_queue_init_params(1, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } fd = mkstemp(fname); if (fd < 0) { perror("mkstemp"); return 1; } unlink(fname); if (posix_memalign((void **) &buf, 4096, BUF_SIZE)) return 1; br = io_uring_setup_buf_ring(&ring, 1, BUF_BGID, 0, &ret); if (!br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } io_uring_buf_ring_add(br, buf, BUF_SIZE, 1, BR_MASK, 0); io_uring_buf_ring_advance(br, 1); sqe = io_uring_get_sqe(&ring); /* len == 0 means just use the defined provided buffer length */ io_uring_prep_read_multishot(sqe, fd, 0, 0, BUF_BGID); if (async) sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return 1; } if (cqe->flags & IORING_CQE_F_MORE) { fprintf(stderr, "MORE flag set unexpected %d\n", cqe->flags); return 1; } if (cqe->res != -EBADFD) { fprintf(stderr, "Got cqe res %d, wanted -EBADFD\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); free(buf); return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(0, 0, 0); if (ret) { fprintf(stderr, "test 0 0 0 failed\n"); return T_EXIT_FAIL; } if (no_buf_ring || no_read_mshot) return T_EXIT_SKIP; ret = test(0, 1, 0); if (ret) { fprintf(stderr, "test 0 1 0, failed\n"); return T_EXIT_FAIL; } ret = test(1, 0, 0); if (ret) { fprintf(stderr, "test 1 0 0 failed\n"); return T_EXIT_FAIL; } ret = test(0, 0, 1); if (ret) { fprintf(stderr, "test 0 0 1 failed\n"); return T_EXIT_FAIL; } ret = test(0, 1, 1); if (ret) { fprintf(stderr, "test 0 1 1 failed\n"); return T_EXIT_FAIL; } ret = test(1, 0, 1); if (ret) { fprintf(stderr, "test 1 0 1, failed\n"); return T_EXIT_FAIL; } ret = test(1, 0, 1); if (ret) { fprintf(stderr, "test 1 0 1 failed\n"); return T_EXIT_FAIL; } ret = test(1, 1, 1); if (ret) { fprintf(stderr, "test 1 1 1 failed\n"); return T_EXIT_FAIL; } ret = test_invalid(0); if (ret) { fprintf(stderr, "test_invalid 0 failed\n"); return T_EXIT_FAIL; } ret = test_invalid(1); if (ret) { fprintf(stderr, "test_invalid 1 failed\n"); return T_EXIT_FAIL; } ret = test_clamp(); if (ret) { fprintf(stderr, "test_clamp failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/read-write.c000066400000000000000000000503271461424365000163760ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: basic read/write tests with buffered, O_DIRECT, and SQPOLL */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (256 * 1024) #define BS 8192 #define BUFFERS (FILE_SIZE / BS) static struct iovec *vecs; static int no_read; static int no_buf_select; static int warned; static int create_nonaligned_buffers(void) { int i; vecs = t_malloc(BUFFERS * sizeof(struct iovec)); for (i = 0; i < BUFFERS; i++) { char *p = t_malloc(3 * BS); if (!p) return 1; vecs[i].iov_base = p + (rand() % BS); vecs[i].iov_len = 1 + (rand() % BS); } return 0; } static int __test_io(const char *file, struct io_uring *ring, int write, int buffered, int sqthread, int fixed, int nonvec, int buf_select, int seq, int exp_len) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int open_flags; int i, fd = -1, ret; off_t offset; #ifdef VERBOSE fprintf(stdout, "%s: start %d/%d/%d/%d/%d: ", __FUNCTION__, write, buffered, sqthread, fixed, nonvec); #endif if (write) open_flags = O_WRONLY; else open_flags = O_RDONLY; if (!buffered) open_flags |= O_DIRECT; if (fixed) { ret = t_register_buffers(ring, vecs, BUFFERS); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "buffer reg failed: %d\n", ret); goto err; } } fd = open(file, open_flags); if (fd < 0) { if (errno == EINVAL) return 0; perror("file open"); goto err; } if (sqthread) { ret = io_uring_register_files(ring, &fd, 1); if (ret) { fprintf(stderr, "file reg failed: %d\n", ret); goto err; } } offset = 0; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } if (!seq) offset = BS * (rand() % BUFFERS); if (write) { int do_fixed = fixed; int use_fd = fd; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); } else if (nonvec) { io_uring_prep_write(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset); } else { io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, offset); } } else { int do_fixed = fixed; int use_fd = fd; if (sqthread) use_fd = 0; if (fixed && (i & 1)) do_fixed = 0; if (do_fixed) { io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset, i); } else if (nonvec) { io_uring_prep_read(sqe, use_fd, vecs[i].iov_base, vecs[i].iov_len, offset); } else { io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, offset); } } sqe->user_data = i; if (sqthread) sqe->flags |= IOSQE_FIXED_FILE; if (buf_select) { if (nonvec) sqe->addr = 0; sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = buf_select; } if (seq) offset += BS; } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); goto err; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->res == -EINVAL && nonvec) { if (!warned) { fprintf(stdout, "Non-vectored IO not " "supported, skipping\n"); warned = 1; no_read = 1; } } else if (exp_len == -1) { int iov_len = vecs[cqe->user_data].iov_len; if (cqe->res != iov_len) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, iov_len); goto err; } } else if (cqe->res != exp_len) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len); goto err; } if (buf_select && exp_len == BS) { int bid = cqe->flags >> 16; unsigned char *ptr = vecs[bid].iov_base; int j; for (j = 0; j < BS; j++) { if (ptr[j] == cqe->user_data) continue; fprintf(stderr, "Data mismatch! bid=%d, " "wanted=%d, got=%d\n", bid, (int)cqe->user_data, ptr[j]); return 1; } } io_uring_cqe_seen(ring, cqe); } if (fixed) { ret = io_uring_unregister_buffers(ring); if (ret) { fprintf(stderr, "buffer unreg failed: %d\n", ret); goto err; } } if (sqthread) { ret = io_uring_unregister_files(ring); if (ret) { fprintf(stderr, "file unreg failed: %d\n", ret); goto err; } } close(fd); #ifdef VERBOSE fprintf(stdout, "PASS\n"); #endif return 0; err: #ifdef VERBOSE fprintf(stderr, "FAILED\n"); #endif if (fd != -1) close(fd); return 1; } static int test_io(const char *file, int write, int buffered, int sqthread, int fixed, int nonvec, int exp_len) { struct io_uring ring; int ret, ring_flags = 0; if (sqthread) ring_flags = IORING_SETUP_SQPOLL; ret = t_create_ring(64, &ring, ring_flags); if (ret == T_SETUP_SKIP) return 0; if (ret != T_SETUP_OK) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = __test_io(file, &ring, write, buffered, sqthread, fixed, nonvec, 0, 0, exp_len); io_uring_queue_exit(&ring); return ret; } static int read_poll_link(const char *file) { struct __kernel_timespec ts; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int i, fd, ret, fds[2]; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; fd = open(file, O_WRONLY); if (fd < 0) { perror("open"); return 1; } if (pipe(fds)) { perror("pipe"); return 1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, fd, &vecs[0], 1, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 2; ts.tv_sec = 1; ts.tv_nsec = 0; sqe = io_uring_get_sqe(&ring); io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 3; ret = io_uring_submit(&ring); if (ret != 3) { fprintf(stderr, "submitted %d\n", ret); return 1; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } io_uring_cqe_seen(&ring, cqe); } return 0; } static int has_nonvec_read(void) { struct io_uring_probe *p; struct io_uring ring; int ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); exit(ret); } p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); ret = io_uring_register_probe(&ring, p, 256); /* if we don't have PROBE_REGISTER, we don't have OP_READ/WRITE */ if (ret == -EINVAL) { out: io_uring_queue_exit(&ring); return 0; } else if (ret) { fprintf(stderr, "register_probe: %d\n", ret); goto out; } if (p->ops_len <= IORING_OP_READ) goto out; if (!(p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED)) goto out; io_uring_queue_exit(&ring); return 1; } static int test_eventfd_read(void) { struct io_uring ring; int fd, ret; eventfd_t event; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; if (no_read) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) return ret; fd = eventfd(1, 0); if (fd < 0) { perror("eventfd"); return 1; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, &event, sizeof(eventfd_t), 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submitted %d\n", ret); return 1; } eventfd_write(fd, 1); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res == -EINVAL) { fprintf(stdout, "eventfd IO not supported, skipping\n"); } else if (cqe->res != sizeof(eventfd_t)) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, (int) sizeof(eventfd_t)); return 1; } io_uring_cqe_seen(&ring, cqe); return 0; } static int test_buf_select_short(const char *filename, int nonvec) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, i, exp_len; if (no_buf_select) return 0; ret = io_uring_queue_init(64, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } exp_len = 0; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, vecs[i].iov_len / 2, 1, 1, i); if (!exp_len) exp_len = vecs[i].iov_len / 2; } ret = io_uring_submit(&ring); if (ret != BUFFERS) { fprintf(stderr, "submit: %d\n", ret); return -1; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res < 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } ret = __test_io(filename, &ring, 0, 0, 0, 0, nonvec, 1, 1, exp_len); io_uring_queue_exit(&ring); return ret; } static int provide_buffers_iovec(struct io_uring *ring, int bgid) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int i, ret; for (i = 0; i < BUFFERS; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_provide_buffers(sqe, vecs[i].iov_base, vecs[i].iov_len, 1, bgid, i); } ret = io_uring_submit(ring); if (ret != BUFFERS) { fprintf(stderr, "submit: %d\n", ret); return -1; } for (i = 0; i < BUFFERS; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); } return 0; } static int test_buf_select_pipe(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, i; int fds[2]; if (no_buf_select) return 0; ret = io_uring_queue_init(64, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = provide_buffers_iovec(&ring, 0); if (ret) { fprintf(stderr, "provide buffers failed: %d\n", ret); return 1; } ret = pipe(fds); if (ret) { fprintf(stderr, "pipe failed: %d\n", ret); return 1; } for (i = 0; i < 5; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], NULL, 1 /* max read 1 per go */, -1); sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = 0; } io_uring_submit(&ring); ret = write(fds[1], "01234", 5); if (ret != 5) { fprintf(stderr, "pipe write failed %d\n", ret); return 1; } for (i = 0; i < 5; i++) { const char *buff; if (io_uring_wait_cqe(&ring, &cqe)) { fprintf(stderr, "bad wait %d\n", i); return 1; } if (cqe->res != 1) { fprintf(stderr, "expected read %d\n", cqe->res); return 1; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "no buffer %d\n", cqe->res); return 1; } buff = vecs[cqe->flags >> 16].iov_base; if (*buff != '0' + i) { fprintf(stderr, "%d: expected %c, got %c\n", i, '0' + i, *buff); return 1; } io_uring_cqe_seen(&ring, cqe); } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } static int test_buf_select(const char *filename, int nonvec) { struct io_uring_probe *p; struct io_uring ring; int ret, i; ret = io_uring_queue_init(64, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } p = io_uring_get_probe_ring(&ring); if (!p || !io_uring_opcode_supported(p, IORING_OP_PROVIDE_BUFFERS)) { no_buf_select = 1; fprintf(stdout, "Buffer select not supported, skipping\n"); return 0; } io_uring_free_probe(p); /* * Write out data with known pattern */ for (i = 0; i < BUFFERS; i++) memset(vecs[i].iov_base, i, vecs[i].iov_len); ret = __test_io(filename, &ring, 1, 0, 0, 0, 0, 0, 1, BS); if (ret) { fprintf(stderr, "failed writing data\n"); return 1; } for (i = 0; i < BUFFERS; i++) memset(vecs[i].iov_base, 0x55, vecs[i].iov_len); ret = provide_buffers_iovec(&ring, 1); if (ret) return ret; ret = __test_io(filename, &ring, 0, 0, 0, 0, nonvec, 1, 1, BS); io_uring_queue_exit(&ring); return ret; } static int test_rem_buf(int batch, int sqe_flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int left, ret, nr = 0; int bgid = 1; if (no_buf_select) return 0; ret = io_uring_queue_init(64, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = provide_buffers_iovec(&ring, bgid); if (ret) return ret; left = BUFFERS; while (left) { int to_rem = (left < batch) ? left : batch; left -= to_rem; sqe = io_uring_get_sqe(&ring); io_uring_prep_remove_buffers(sqe, to_rem, bgid); sqe->user_data = to_rem; sqe->flags |= sqe_flags; ++nr; } ret = io_uring_submit(&ring); if (ret != nr) { fprintf(stderr, "submit: %d\n", ret); return -1; } for (; nr > 0; nr--) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res != cqe->user_data) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return ret; } static int test_rem_buf_single(int to_rem) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, expected; int bgid = 1; if (no_buf_select) return 0; ret = io_uring_queue_init(64, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } ret = provide_buffers_iovec(&ring, bgid); if (ret) return ret; expected = (to_rem > BUFFERS) ? BUFFERS : to_rem; sqe = io_uring_get_sqe(&ring); io_uring_prep_remove_buffers(sqe, to_rem, bgid); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return -1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res != expected) { fprintf(stderr, "cqe->res=%d, expected=%d\n", cqe->res, expected); return 1; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return ret; } static int test_io_link(const char *file) { const int nr_links = 100; const int link_len = 100; const int nr_sqes = nr_links * link_len; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int i, j, fd, ret; fd = open(file, O_WRONLY); if (fd < 0) { perror("file open"); goto err; } ret = io_uring_queue_init(nr_sqes, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); goto err; } for (i = 0; i < nr_links; ++i) { for (j = 0; j < link_len; ++j) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } io_uring_prep_writev(sqe, fd, &vecs[0], 1, 0); sqe->flags |= IOSQE_ASYNC; if (j != link_len - 1) sqe->flags |= IOSQE_IO_LINK; } } ret = io_uring_submit(&ring); if (ret != nr_sqes) { ret = io_uring_peek_cqe(&ring, &cqe); if (!ret && cqe->res == -EINVAL) { fprintf(stdout, "IOSQE_ASYNC not supported, skipped\n"); goto out; } fprintf(stderr, "submit got %d, wanted %d\n", ret, nr_sqes); goto err; } for (i = 0; i < nr_sqes; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->res == -EINVAL) { if (!warned) { fprintf(stdout, "Non-vectored IO not " "supported, skipping\n"); warned = 1; no_read = 1; } } else if (cqe->res != BS) { fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, BS); goto err; } io_uring_cqe_seen(&ring, cqe); } out: io_uring_queue_exit(&ring); close(fd); return 0; err: if (fd != -1) close(fd); return 1; } static int test_write_efbig(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct rlimit rlim, old_rlim; int i, fd, ret; loff_t off; if (geteuid()) { fprintf(stdout, "Not root, skipping %s\n", __FUNCTION__); return 0; } if (getrlimit(RLIMIT_FSIZE, &old_rlim) < 0) { perror("getrlimit"); return 1; } rlim = old_rlim; rlim.rlim_cur = 128 * 1024; rlim.rlim_max = 128 * 1024; if (setrlimit(RLIMIT_FSIZE, &rlim) < 0) { perror("setrlimit"); return 1; } fd = open(".efbig", O_WRONLY | O_CREAT, 0644); if (fd < 0) { perror("file open"); goto err; } unlink(".efbig"); ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); goto err; } off = 0; for (i = 0; i < 32; i++) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } io_uring_prep_writev(sqe, fd, &vecs[i], 1, off); io_uring_sqe_set_data64(sqe, i); off += BS; } ret = io_uring_submit(&ring); if (ret != 32) { fprintf(stderr, "submit got %d, wanted %d\n", ret, 32); goto err; } for (i = 0; i < 32; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } if (cqe->user_data < 16) { if (cqe->res != BS) { fprintf(stderr, "bad write: %d\n", cqe->res); goto err; } } else { if (cqe->res != -EFBIG) { fprintf(stderr, "Expected -EFBIG: %d\n", cqe->res); goto err; } } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); close(fd); unlink(".efbig"); if (setrlimit(RLIMIT_FSIZE, &old_rlim) < 0) { perror("setrlimit"); return 1; } return 0; err: if (fd != -1) close(fd); return 1; } int main(int argc, char *argv[]) { int i, ret, nr; char buf[256]; char *fname; if (argc > 1) { fname = argv[1]; } else { srand((unsigned)time(NULL)); snprintf(buf, sizeof(buf), ".basic-rw-%u-%u", (unsigned)rand(), (unsigned)getpid()); fname = buf; t_create_file(fname, FILE_SIZE); } signal(SIGXFSZ, SIG_IGN); vecs = t_create_buffers(BUFFERS, BS); /* if we don't have nonvec read, skip testing that */ nr = has_nonvec_read() ? 32 : 16; for (i = 0; i < nr; i++) { int write = (i & 1) != 0; int buffered = (i & 2) != 0; int sqthread = (i & 4) != 0; int fixed = (i & 8) != 0; int nonvec = (i & 16) != 0; ret = test_io(fname, write, buffered, sqthread, fixed, nonvec, BS); if (ret) { fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n", write, buffered, sqthread, fixed, nonvec); goto err; } } ret = test_buf_select(fname, 1); if (ret) { fprintf(stderr, "test_buf_select nonvec failed\n"); goto err; } ret = test_buf_select(fname, 0); if (ret) { fprintf(stderr, "test_buf_select vec failed\n"); goto err; } ret = test_buf_select_short(fname, 1); if (ret) { fprintf(stderr, "test_buf_select_short nonvec failed\n"); goto err; } ret = test_buf_select_short(fname, 0); if (ret) { fprintf(stderr, "test_buf_select_short vec failed\n"); goto err; } ret = test_buf_select_pipe(); if (ret) { fprintf(stderr, "test_buf_select_pipe failed\n"); goto err; } ret = test_eventfd_read(); if (ret) { fprintf(stderr, "test_eventfd_read failed\n"); goto err; } ret = read_poll_link(fname); if (ret) { fprintf(stderr, "read_poll_link failed\n"); goto err; } ret = test_io_link(fname); if (ret) { fprintf(stderr, "test_io_link failed\n"); goto err; } ret = test_write_efbig(); if (ret) { fprintf(stderr, "test_write_efbig failed\n"); goto err; } ret = test_rem_buf(1, 0); if (ret) { fprintf(stderr, "test_rem_buf by 1 failed\n"); goto err; } ret = test_rem_buf(10, 0); if (ret) { fprintf(stderr, "test_rem_buf by 10 failed\n"); goto err; } ret = test_rem_buf(2, IOSQE_IO_LINK); if (ret) { fprintf(stderr, "test_rem_buf link failed\n"); goto err; } ret = test_rem_buf(2, IOSQE_ASYNC); if (ret) { fprintf(stderr, "test_rem_buf async failed\n"); goto err; } srand((unsigned)time(NULL)); if (create_nonaligned_buffers()) { fprintf(stderr, "file creation failed\n"); goto err; } /* test fixed bufs with non-aligned len/offset */ for (i = 0; i < nr; i++) { int write = (i & 1) != 0; int buffered = (i & 2) != 0; int sqthread = (i & 4) != 0; int fixed = (i & 8) != 0; int nonvec = (i & 16) != 0; /* direct IO requires alignment, skip it */ if (!buffered || !fixed || nonvec) continue; ret = test_io(fname, write, buffered, sqthread, fixed, nonvec, -1); if (ret) { fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n", write, buffered, sqthread, fixed, nonvec); goto err; } } ret = test_rem_buf_single(BUFFERS + 1); if (ret) { fprintf(stderr, "test_rem_buf_single(BUFFERS + 1) failed\n"); goto err; } if (fname != argv[1]) unlink(fname); return 0; err: if (fname != argv[1]) unlink(fname); return 1; } liburing-2.6/test/recv-msgall-stream.c000066400000000000000000000166151461424365000200420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test MSG_WAITALL for recv/recvmsg and include normal sync versions just * for comparison. */ #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define MAX_MSG 128 struct recv_data { pthread_mutex_t mutex; int use_recvmsg; int use_sync; __be16 port; }; static int get_conn_sock(struct recv_data *rd, int *sockout) { struct sockaddr_in saddr; int sockfd, ret, val; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); sockfd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); if (sockfd < 0) { perror("socket"); goto err; } val = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); setsockopt(sockfd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); if (t_bind_ephemeral_port(sockfd, &saddr)) { perror("bind"); goto err; } rd->port = saddr.sin_port; ret = listen(sockfd, 16); if (ret < 0) { perror("listen"); goto err; } pthread_mutex_unlock(&rd->mutex); ret = accept(sockfd, NULL, NULL); if (ret < 0) { perror("accept"); return -1; } *sockout = sockfd; return ret; err: pthread_mutex_unlock(&rd->mutex); return -1; } static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock, struct recv_data *rd) { struct io_uring_sqe *sqe; struct msghdr msg = { }; int sockfd, sockout = -1, ret; sockfd = get_conn_sock(rd, &sockout); if (sockfd < 0) goto err; sqe = io_uring_get_sqe(ring); if (!rd->use_recvmsg) { io_uring_prep_recv(sqe, sockfd, iov->iov_base, iov->iov_len, MSG_WAITALL); } else { msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = iov; msg.msg_iovlen = 1; io_uring_prep_recvmsg(sqe, sockfd, &msg, MSG_WAITALL); } sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } *sock = sockfd; return 0; err: if (sockout != -1) { shutdown(sockout, SHUT_RDWR); close(sockout); } if (sockfd != -1) { shutdown(sockfd, SHUT_RDWR); close(sockfd); } return 1; } static int do_recv(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stdout, "wait_cqe: %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "recv not supported, skipping\n"); return 0; } if (cqe->res < 0) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } if (cqe->res != MAX_MSG * sizeof(int)) { fprintf(stderr, "got wrong length: %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static int recv_sync(struct recv_data *rd) { int buf[MAX_MSG]; struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf), }; int i, ret, sockfd, sockout = -1; sockfd = get_conn_sock(rd, &sockout); if (rd->use_recvmsg) { struct msghdr msg = { }; msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = &iov; msg.msg_iovlen = 1; ret = recvmsg(sockfd, &msg, MSG_WAITALL); } else { ret = recv(sockfd, buf, sizeof(buf), MSG_WAITALL); } if (ret < 0) { perror("receive"); goto err; } if (ret != sizeof(buf)) { ret = -1; goto err; } for (i = 0; i < MAX_MSG; i++) { if (buf[i] != i) goto err; } ret = 0; err: shutdown(sockout, SHUT_RDWR); shutdown(sockfd, SHUT_RDWR); close(sockout); close(sockfd); return ret; } static int recv_uring(struct recv_data *rd) { int buf[MAX_MSG]; struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf), }; struct io_uring_params p = { }; struct io_uring ring; int ret, sock = -1, sockout = -1; ret = t_create_ring_params(1, &ring, &p); if (ret == T_SETUP_SKIP) { pthread_mutex_unlock(&rd->mutex); ret = 0; goto err; } else if (ret < 0) { pthread_mutex_unlock(&rd->mutex); goto err; } sock = recv_prep(&ring, &iov, &sockout, rd); if (ret) { fprintf(stderr, "recv_prep failed: %d\n", ret); goto err; } ret = do_recv(&ring); if (!ret) { int i; for (i = 0; i < MAX_MSG; i++) { if (buf[i] != i) { fprintf(stderr, "found %d at %d\n", buf[i], i); ret = 1; break; } } } shutdown(sockout, SHUT_RDWR); shutdown(sock, SHUT_RDWR); close(sock); close(sockout); io_uring_queue_exit(&ring); err: if (sock != -1) { shutdown(sock, SHUT_RDWR); close(sock); } if (sockout != -1) { shutdown(sockout, SHUT_RDWR); close(sockout); } return ret; } static void *recv_fn(void *data) { struct recv_data *rd = data; if (rd->use_sync) return (void *) (uintptr_t) recv_sync(rd); return (void *) (uintptr_t) recv_uring(rd); } static int do_send(struct recv_data *rd) { struct sockaddr_in saddr; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret, i; struct iovec iov; int *buf; ret = io_uring_queue_init(2, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return 1; } buf = malloc(MAX_MSG * sizeof(int)); for (i = 0; i < MAX_MSG; i++) buf[i] = i; sockfd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); if (sockfd < 0) { perror("socket"); return 1; } pthread_mutex_lock(&rd->mutex); assert(rd->port != 0); memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = rd->port; inet_pton(AF_INET, "127.0.0.1", &saddr.sin_addr); ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { perror("connect"); return 1; } iov.iov_base = buf; iov.iov_len = MAX_MSG * sizeof(int) / 2; for (i = 0; i < 2; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } usleep(10000); iov.iov_base += iov.iov_len; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res == -EINVAL) { fprintf(stdout, "send not supported, skipping\n"); close(sockfd); return 0; } if (cqe->res != iov.iov_len) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } io_uring_cqe_seen(&ring, cqe); } shutdown(sockfd, SHUT_RDWR); close(sockfd); return 0; err: shutdown(sockfd, SHUT_RDWR); close(sockfd); return 1; } static int test(int use_recvmsg, int use_sync) { pthread_mutexattr_t attr; pthread_t recv_thread; struct recv_data rd; int ret; void *retval; pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, 1); pthread_mutex_init(&rd.mutex, &attr); pthread_mutex_lock(&rd.mutex); rd.use_recvmsg = use_recvmsg; rd.use_sync = use_sync; rd.port = 0; ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); if (ret) { fprintf(stderr, "Thread create failed: %d\n", ret); pthread_mutex_unlock(&rd.mutex); return 1; } do_send(&rd); pthread_join(recv_thread, &retval); return (intptr_t)retval; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test(0, 0); if (ret) { fprintf(stderr, "test recv failed\n"); return ret; } ret = test(1, 0); if (ret) { fprintf(stderr, "test recvmsg failed\n"); return ret; } ret = test(0, 1); if (ret) { fprintf(stderr, "test sync recv failed\n"); return ret; } ret = test(1, 1); if (ret) { fprintf(stderr, "test sync recvmsg failed\n"); return ret; } return 0; } liburing-2.6/test/recv-msgall.c000066400000000000000000000120161461424365000165400ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test MSG_WAITALL with datagram sockets, with a send splice into two. */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define MAX_MSG 128 #define HOST "127.0.0.1" static __be16 bind_port; struct recv_data { pthread_barrier_t barrier; int use_recvmsg; struct msghdr msg; }; static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock, struct recv_data *rd) { struct sockaddr_in saddr; struct io_uring_sqe *sqe; int sockfd, ret, val; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } val = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); if (t_bind_ephemeral_port(sockfd, &saddr)) { perror("bind"); goto err; } bind_port = saddr.sin_port; sqe = io_uring_get_sqe(ring); if (!rd->use_recvmsg) { io_uring_prep_recv(sqe, sockfd, iov->iov_base, iov->iov_len, MSG_WAITALL); } else { struct msghdr *msg = &rd->msg; memset(msg, 0, sizeof(*msg)); msg->msg_namelen = sizeof(struct sockaddr_in); msg->msg_iov = iov; msg->msg_iovlen = 1; io_uring_prep_recvmsg(sqe, sockfd, msg, MSG_WAITALL); } sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } *sock = sockfd; return 0; err: close(sockfd); return 1; } static int do_recv(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stdout, "wait_cqe: %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "recv not supported, skipping\n"); return 0; } if (cqe->res < 0) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } if (cqe->res != MAX_MSG * sizeof(int) / 2) { fprintf(stderr, "got wrong length: %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static void *recv_fn(void *data) { struct recv_data *rd = data; int buf[MAX_MSG]; struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf), }; struct io_uring_params p = { }; struct io_uring ring; int ret, sock; ret = t_create_ring_params(1, &ring, &p); if (ret == T_SETUP_SKIP) { pthread_barrier_wait(&rd->barrier); ret = 0; goto err; } else if (ret < 0) { pthread_barrier_wait(&rd->barrier); goto err; } ret = recv_prep(&ring, &iov, &sock, rd); if (ret) { fprintf(stderr, "recv_prep failed: %d\n", ret); goto err; } pthread_barrier_wait(&rd->barrier); ret = do_recv(&ring); close(sock); io_uring_queue_exit(&ring); err: return (void *)(intptr_t)ret; } static int do_send(void) { struct sockaddr_in saddr; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret, i; struct iovec iov; int *buf; ret = io_uring_queue_init(2, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return 1; } buf = malloc(MAX_MSG * sizeof(int)); for (i = 0; i < MAX_MSG; i++) buf[i] = i; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = bind_port; inet_pton(AF_INET, HOST, &saddr.sin_addr); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { perror("connect"); return 1; } iov.iov_base = buf; iov.iov_len = MAX_MSG * sizeof(int) / 2; for (i = 0; i < 2; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } usleep(10000); iov.iov_base += iov.iov_len; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res == -EINVAL) { fprintf(stdout, "send not supported, skipping\n"); close(sockfd); return 0; } if (cqe->res != iov.iov_len) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } io_uring_cqe_seen(&ring, cqe); } close(sockfd); return 0; err: close(sockfd); return 1; } static int test(int use_recvmsg) { pthread_t recv_thread; struct recv_data rd; int ret; void *retval; pthread_barrier_init(&rd.barrier, NULL, 2); rd.use_recvmsg = use_recvmsg; ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); if (ret) { fprintf(stderr, "Thread create failed: %d\n", ret); return 1; } pthread_barrier_wait(&rd.barrier); do_send(); pthread_join(recv_thread, &retval); pthread_barrier_destroy(&rd.barrier); return (intptr_t)retval; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test(0); if (ret) { fprintf(stderr, "test recv failed\n"); return ret; } ret = test(1); if (ret) { fprintf(stderr, "test recvmsg failed\n"); return ret; } return 0; } liburing-2.6/test/recv-multishot.c000066400000000000000000000341111461424365000173110ustar00rootroot00000000000000// SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define ENORECVMULTISHOT 9999 enum early_error_t { ERROR_NONE = 0, ERROR_NOT_ENOUGH_BUFFERS, ERROR_EARLY_CLOSE_SENDER, ERROR_EARLY_CLOSE_RECEIVER, ERROR_EARLY_OVERFLOW, ERROR_EARLY_LAST }; struct args { bool stream; bool wait_each; bool recvmsg; enum early_error_t early_error; bool defer; }; static int check_sockaddr(struct sockaddr_in *in) { struct in_addr expected; inet_pton(AF_INET, "127.0.0.1", &expected); if (in->sin_family != AF_INET) { fprintf(stderr, "bad family %d\n", (int)htons(in->sin_family)); return -1; } if (memcmp(&expected, &in->sin_addr, sizeof(in->sin_addr))) { char buff[256]; const char *addr = inet_ntop(AF_INET, &in->sin_addr, buff, sizeof(buff)); fprintf(stderr, "unexpected address %s\n", addr ? addr : "INVALID"); return -1; } return 0; } static int test(struct args *args) { int const N = 8; int const N_BUFFS = N * 64; int const N_CQE_OVERFLOW = 4; int const min_cqes = args->early_error ? 2 : 8; int const NAME_LEN = sizeof(struct sockaddr_storage); int const CONTROL_LEN = CMSG_ALIGN(sizeof(struct sockaddr_storage)) + sizeof(struct cmsghdr); struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int fds[2], ret, i, j; int total_sent_bytes = 0, total_recv_bytes = 0, total_dropped_bytes = 0; int send_buff[256]; int *sent_buffs[N_BUFFS]; int *recv_buffs[N_BUFFS]; int *at; struct io_uring_cqe recv_cqe[N_BUFFS]; int recv_cqes = 0; bool early_error = false; bool early_error_started = false; struct __kernel_timespec timeout = { .tv_sec = 1, }; struct msghdr msg; struct io_uring_params params = { }; int n_sqe = 32; memset(recv_buffs, 0, sizeof(recv_buffs)); if (args->defer) params.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; if (args->early_error == ERROR_EARLY_OVERFLOW) { params.flags |= IORING_SETUP_CQSIZE; params.cq_entries = N_CQE_OVERFLOW; n_sqe = N_CQE_OVERFLOW; } ret = io_uring_queue_init_params(n_sqe, &ring, ¶ms); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return ret; } ret = t_create_socket_pair(fds, args->stream); if (ret) { fprintf(stderr, "t_create_socket_pair failed: %d\n", ret); return ret; } if (!args->stream) { bool val = true; /* force some cmsgs to come back to us */ ret = setsockopt(fds[0], IPPROTO_IP, IP_RECVORIGDSTADDR, &val, sizeof(val)); if (ret) { fprintf(stderr, "setsockopt failed %d\n", errno); goto cleanup; } } for (i = 0; i < ARRAY_SIZE(send_buff); i++) send_buff[i] = i; for (i = 0; i < ARRAY_SIZE(recv_buffs); i++) { /* prepare some different sized buffers */ int buffer_size = (i % 2 == 0 && (args->stream || args->recvmsg)) ? 1 : N; buffer_size *= sizeof(int); if (args->recvmsg) { buffer_size += sizeof(struct io_uring_recvmsg_out) + NAME_LEN + CONTROL_LEN; } recv_buffs[i] = malloc(buffer_size); if (i > 2 && args->early_error == ERROR_NOT_ENOUGH_BUFFERS) continue; sqe = io_uring_get_sqe(&ring); io_uring_prep_provide_buffers(sqe, recv_buffs[i], buffer_size, 1, 7, i); io_uring_sqe_set_data64(sqe, 0x999); memset(recv_buffs[i], 0xcc, buffer_size); if (io_uring_submit_and_wait_timeout(&ring, &cqe, 1, &timeout, NULL) < 0) { fprintf(stderr, "provide buffers failed: %d\n", ret); ret = -1; goto cleanup; } io_uring_cqe_seen(&ring, cqe); } sqe = io_uring_get_sqe(&ring); if (args->recvmsg) { unsigned int flags = 0; if (!args->stream) flags |= MSG_TRUNC; memset(&msg, 0, sizeof(msg)); msg.msg_namelen = NAME_LEN; msg.msg_controllen = CONTROL_LEN; io_uring_prep_recvmsg_multishot(sqe, fds[0], &msg, flags); } else { io_uring_prep_recv_multishot(sqe, fds[0], NULL, 0, 0); } sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = 7; io_uring_sqe_set_data64(sqe, 1234); io_uring_submit(&ring); at = &send_buff[0]; total_sent_bytes = 0; for (i = 0; i < N; i++) { int to_send = sizeof(*at) * (i+1); total_sent_bytes += to_send; sent_buffs[i] = at; if (send(fds[1], at, to_send, 0) != to_send) { if (early_error_started) break; fprintf(stderr, "send failed %d\n", errno); ret = -1; goto cleanup; } if (i == 2) { if (args->early_error == ERROR_EARLY_CLOSE_RECEIVER) { /* allow previous sends to complete */ usleep(1000); io_uring_get_events(&ring); sqe = io_uring_get_sqe(&ring); io_uring_prep_recv(sqe, fds[0], NULL, 0, 0); io_uring_prep_cancel64(sqe, 1234, 0); io_uring_sqe_set_data64(sqe, 0x888); sqe->flags |= IOSQE_CQE_SKIP_SUCCESS; io_uring_submit(&ring); early_error_started = true; /* allow the cancel to complete */ usleep(1000); io_uring_get_events(&ring); } if (args->early_error == ERROR_EARLY_CLOSE_SENDER) { early_error_started = true; shutdown(fds[1], SHUT_RDWR); close(fds[1]); } } at += (i+1); if (args->wait_each) { ret = io_uring_wait_cqes(&ring, &cqe, 1, &timeout, NULL); if (ret) { fprintf(stderr, "wait_each failed: %d\n", ret); ret = -1; goto cleanup; } while (io_uring_peek_cqe(&ring, &cqe) == 0) { recv_cqe[recv_cqes++] = *cqe; if (cqe->flags & IORING_CQE_F_MORE) { io_uring_cqe_seen(&ring, cqe); } else { early_error = true; io_uring_cqe_seen(&ring, cqe); } } if (early_error) break; } } close(fds[1]); /* allow sends to finish */ usleep(1000); if ((args->stream && !early_error) || recv_cqes < min_cqes) { unsigned int to_wait = 1; if (recv_cqes < min_cqes) to_wait = min_cqes - recv_cqes; ret = io_uring_wait_cqes(&ring, &cqe, to_wait, &timeout, NULL); if (ret && ret != -ETIME) { fprintf(stderr, "wait final failed: %d\n", ret); ret = -1; goto cleanup; } } while (io_uring_peek_cqe(&ring, &cqe) == 0) { recv_cqe[recv_cqes++] = *cqe; io_uring_cqe_seen(&ring, cqe); } ret = -1; at = &send_buff[0]; if (recv_cqes < min_cqes) { if (recv_cqes > 0 && recv_cqe[0].res == -EINVAL) { return -ENORECVMULTISHOT; } /* some kernels apparently don't check ->ioprio, skip */ ret = -ENORECVMULTISHOT; goto cleanup; } for (i = 0; i < recv_cqes; i++) { cqe = &recv_cqe[i]; bool const is_last = i == recv_cqes - 1; /* * Older kernels could terminate multishot early due to overflow, * but later ones will not. So discriminate based on the MORE flag. */ bool const early_last = args->early_error == ERROR_EARLY_OVERFLOW && !args->wait_each && i >= N_CQE_OVERFLOW && !(cqe->flags & IORING_CQE_F_MORE); bool const should_be_last = (cqe->res <= 0) || (args->stream && is_last) || early_last; int *this_recv; int orig_payload_size = cqe->res; if (should_be_last) { int used_res = cqe->res; if (!is_last) { fprintf(stderr, "not last cqe had error %d\n", i); goto cleanup; } switch (args->early_error) { case ERROR_NOT_ENOUGH_BUFFERS: if (cqe->res != -ENOBUFS) { fprintf(stderr, "ERROR_NOT_ENOUGH_BUFFERS: res %d\n", cqe->res); goto cleanup; } break; case ERROR_EARLY_OVERFLOW: if (cqe->res < 0) { fprintf(stderr, "ERROR_EARLY_OVERFLOW: res %d\n", cqe->res); goto cleanup; } break; case ERROR_EARLY_CLOSE_RECEIVER: if (cqe->res != -ECANCELED) { fprintf(stderr, "ERROR_EARLY_CLOSE_RECEIVER: res %d\n", cqe->res); goto cleanup; } break; case ERROR_NONE: case ERROR_EARLY_CLOSE_SENDER: if (args->recvmsg && (cqe->flags & IORING_CQE_F_BUFFER)) { void *buff = recv_buffs[cqe->flags >> 16]; struct io_uring_recvmsg_out *o = io_uring_recvmsg_validate(buff, cqe->res, &msg); if (!o) { fprintf(stderr, "invalid buff\n"); goto cleanup; } if (o->payloadlen != 0) { fprintf(stderr, "expected 0 payloadlen, got %u\n", o->payloadlen); goto cleanup; } used_res = 0; } else if (cqe->res != 0) { fprintf(stderr, "early error: res %d\n", cqe->res); goto cleanup; } break; case ERROR_EARLY_LAST: fprintf(stderr, "bad error_early\n"); goto cleanup; } if (cqe->res <= 0 && cqe->flags & IORING_CQE_F_BUFFER) { fprintf(stderr, "final BUFFER flag set\n"); goto cleanup; } if (cqe->flags & IORING_CQE_F_MORE) { fprintf(stderr, "final MORE flag set\n"); goto cleanup; } if (used_res <= 0) continue; } else { if (!(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "MORE flag not set\n"); goto cleanup; } } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "BUFFER flag not set\n"); goto cleanup; } this_recv = recv_buffs[cqe->flags >> 16]; if (args->recvmsg) { struct io_uring_recvmsg_out *o = io_uring_recvmsg_validate( this_recv, cqe->res, &msg); if (!o) { fprintf(stderr, "bad recvmsg\n"); goto cleanup; } orig_payload_size = o->payloadlen; if (!args->stream) { orig_payload_size = o->payloadlen; struct cmsghdr *cmsg; if (o->namelen < sizeof(struct sockaddr_in)) { fprintf(stderr, "bad addr len %d", o->namelen); goto cleanup; } if (check_sockaddr((struct sockaddr_in *)io_uring_recvmsg_name(o))) goto cleanup; cmsg = io_uring_recvmsg_cmsg_firsthdr(o, &msg); if (!cmsg || cmsg->cmsg_level != IPPROTO_IP || cmsg->cmsg_type != IP_RECVORIGDSTADDR) { fprintf(stderr, "bad cmsg"); goto cleanup; } if (check_sockaddr((struct sockaddr_in *)CMSG_DATA(cmsg))) goto cleanup; cmsg = io_uring_recvmsg_cmsg_nexthdr(o, &msg, cmsg); if (cmsg) { fprintf(stderr, "unexpected extra cmsg\n"); goto cleanup; } } this_recv = (int *)io_uring_recvmsg_payload(o, &msg); cqe->res = io_uring_recvmsg_payload_length(o, cqe->res, &msg); if (o->payloadlen != cqe->res) { if (!(o->flags & MSG_TRUNC)) { fprintf(stderr, "expected truncated flag\n"); goto cleanup; } total_dropped_bytes += (o->payloadlen - cqe->res); } } total_recv_bytes += cqe->res; if (cqe->res % 4 != 0) { /* * doesn't seem to happen in practice, would need some * work to remove this requirement */ fprintf(stderr, "unexpectedly aligned buffer cqe->res=%d\n", cqe->res); goto cleanup; } /* * for tcp: check buffer arrived in order * for udp: based on size validate data based on size */ if (!args->stream) { int sent_idx = orig_payload_size / sizeof(*at) - 1; if (sent_idx < 0 || sent_idx > N) { fprintf(stderr, "Bad sent idx: %d\n", sent_idx); goto cleanup; } at = sent_buffs[sent_idx]; } for (j = 0; j < cqe->res / 4; j++) { int sent = *at++; int recv = *this_recv++; if (sent != recv) { fprintf(stderr, "recv=%d sent=%d\n", recv, sent); goto cleanup; } } } if (args->early_error == ERROR_NONE && total_recv_bytes + total_dropped_bytes < total_sent_bytes) { fprintf(stderr, "missing recv: recv=%d dropped=%d sent=%d\n", total_recv_bytes, total_sent_bytes, total_dropped_bytes); goto cleanup; } ret = 0; cleanup: for (i = 0; i < ARRAY_SIZE(recv_buffs); i++) free(recv_buffs[i]); close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return ret; } static int test_enobuf(void) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqes[16]; char buffs[256]; int ret, i, fds[2]; if (t_create_ring(8, &ring, 0) != T_SETUP_OK) { fprintf(stderr, "ring create\n"); return -1; } ret = t_create_socket_pair(fds, false); if (ret) { fprintf(stderr, "t_create_socket_pair\n"); return ret; } sqe = io_uring_get_sqe(&ring); assert(sqe); /* deliberately only 2 provided buffers */ io_uring_prep_provide_buffers(sqe, &buffs[0], 1, 2, 0, 0); io_uring_sqe_set_data64(sqe, 0); sqe = io_uring_get_sqe(&ring); assert(sqe); io_uring_prep_recv_multishot(sqe, fds[0], NULL, 0, 0); io_uring_sqe_set_data64(sqe, 1); sqe->buf_group = 0; sqe->flags |= IOSQE_BUFFER_SELECT; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "bad submit %d\n", ret); return -1; } for (i = 0; i < 3; i++) { do { ret = write(fds[1], "?", 1); } while (ret == -1 && errno == EINTR); } ret = io_uring_wait_cqes(&ring, &cqes[0], 4, NULL, NULL); if (ret) { fprintf(stderr, "wait cqes\n"); return ret; } ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 4); if (ret != 4) { fprintf(stderr, "peek batch cqes\n"); return -1; } /* provide buffers */ assert(cqes[0]->user_data == 0); assert(cqes[0]->res == 0); /* valid recv */ assert(cqes[1]->user_data == 1); assert(cqes[2]->user_data == 1); assert(cqes[1]->res == 1); assert(cqes[2]->res == 1); assert(cqes[1]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE)); assert(cqes[2]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE)); /* missing buffer */ assert(cqes[3]->user_data == 1); assert(cqes[3]->res == -ENOBUFS); assert(!(cqes[3]->flags & (IORING_CQE_F_BUFFER | IORING_CQE_F_MORE))); close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ret; int loop; int early_error = 0; bool has_defer; if (argc > 1) return T_EXIT_SKIP; has_defer = t_probe_defer_taskrun(); for (loop = 0; loop < 16; loop++) { struct args a = { .stream = loop & 0x01, .wait_each = loop & 0x2, .recvmsg = loop & 0x04, .defer = loop & 0x08, }; if (a.defer && !has_defer) continue; for (early_error = 0; early_error < ERROR_EARLY_LAST; early_error++) { a.early_error = (enum early_error_t)early_error; ret = test(&a); if (ret) { if (ret == -ENORECVMULTISHOT) { if (loop == 0) return T_EXIT_SKIP; fprintf(stderr, "ENORECVMULTISHOT received but loop>0\n"); } fprintf(stderr, "test stream=%d wait_each=%d recvmsg=%d early_error=%d " " defer=%d failed\n", a.stream, a.wait_each, a.recvmsg, a.early_error, a.defer); return T_EXIT_FAIL; } } } ret = test_enobuf(); if (ret) { fprintf(stderr, "test_enobuf() failed: %d\n", ret); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/reg-fd-only.c000066400000000000000000000053121461424365000164500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test io_uring_setup with IORING_SETUP_REGISTERED_FD_ONLY * */ #include #include "helpers.h" #define NORMAL_PAGE_ENTRIES 8 #define HUGE_PAGE_ENTRIES 512 static int no_mmap; static int test_nops(struct io_uring *ring, int sq_size, int nr_nops) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int i, ret; do { int todo = nr_nops; if (todo > sq_size) todo = sq_size; for (i = 0; i < todo; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); } ret = io_uring_submit(ring); if (ret != todo) { fprintf(stderr, "short submit %d\n", ret); return T_EXIT_FAIL; } for (i = 0; i < todo; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait err %d\n", ret); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } nr_nops -= todo; } while (nr_nops); return T_EXIT_PASS; } static int test(int nentries) { struct io_uring ring; unsigned values[2]; int ret; ret = io_uring_queue_init(nentries, &ring, IORING_SETUP_REGISTERED_FD_ONLY | IORING_SETUP_NO_MMAP); if (ret == -EINVAL) { no_mmap = 1; return T_EXIT_SKIP; } else if (ret == -ENOMEM) { fprintf(stdout, "Enable huge pages to test big rings\n"); return T_EXIT_SKIP; } else if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } ret = io_uring_register_ring_fd(&ring); if (ret != -EEXIST) { fprintf(stderr, "registering already-registered ring fd should fail\n"); goto err; } ret = io_uring_close_ring_fd(&ring); if (ret != -EBADF) { fprintf(stderr, "closing already-closed ring fd should fail\n"); goto err; } /* Test a simple io_uring_register operation expected to work. * io_uring_register_iowq_max_workers is arbitrary. */ values[0] = values[1] = 0; ret = io_uring_register_iowq_max_workers(&ring, values); if (ret || (values[0] == 0 && values[1] == 0)) { fprintf(stderr, "io_uring_register operation failed after closing ring fd\n"); goto err; } ret = test_nops(&ring, nentries, nentries * 4); if (ret) goto err; io_uring_queue_exit(&ring); return T_EXIT_PASS; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; /* test single normal page */ ret = test(NORMAL_PAGE_ENTRIES); if (ret == T_EXIT_SKIP || no_mmap) { return T_EXIT_SKIP; } else if (ret != T_EXIT_PASS) { fprintf(stderr, "test 8 failed\n"); return T_EXIT_FAIL; } /* test with entries requiring a huge page */ ret = test(HUGE_PAGE_ENTRIES); if (ret == T_EXIT_SKIP) { return T_EXIT_SKIP; } else if (ret != T_EXIT_PASS) { fprintf(stderr, "test 512 failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/reg-hint.c000066400000000000000000000021541461424365000160430ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test alloc hint sanity after unregistering the file table */ #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; io_uring_queue_init(1, &ring, 0); ret = io_uring_register_files_sparse(&ring, 16); if (ret) { if (ret == -EINVAL) return T_EXIT_SKIP; fprintf(stderr, "Failed to register file table: %d\n", ret); return T_EXIT_FAIL; } io_uring_unregister_files(&ring); sqe = io_uring_get_sqe(&ring); io_uring_prep_socket_direct_alloc(sqe, AF_UNIX, SOCK_DGRAM, 0, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe: %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != -ENFILE) { fprintf(stderr, "Bad CQE res: %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); return T_EXIT_PASS; } liburing-2.6/test/reg-reg-ring.c000066400000000000000000000041401461424365000166100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test io_uring_register with a registered ring (IORING_REGISTER_USE_REGISTERED_RING) * */ #include #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring ring; unsigned values[2]; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return T_EXIT_FAIL; } if (!(ring.features & IORING_FEAT_REG_REG_RING)) { fprintf(stderr, "IORING_FEAT_REG_REG_RING not available in kernel\n"); io_uring_queue_exit(&ring); return T_EXIT_SKIP; } ret = io_uring_close_ring_fd(&ring); if (ret != -EINVAL) { fprintf(stderr, "closing ring fd should EINVAL before register\n"); goto err; } ret = io_uring_unregister_ring_fd(&ring); if (ret != -EINVAL) { fprintf(stderr, "unregistering not-registered ring fd should fail\n"); goto err; } ret = io_uring_register_ring_fd(&ring); if (ret != 1) { fprintf(stderr, "registering ring fd failed\n"); goto err; } ret = io_uring_register_ring_fd(&ring); if (ret != -EEXIST) { fprintf(stderr, "registering already-registered ring fd should fail\n"); goto err; } /* Test a simple io_uring_register operation expected to work. * io_uring_register_iowq_max_workers is arbitrary. */ values[0] = values[1] = 0; ret = io_uring_register_iowq_max_workers(&ring, values); if (ret || (values[0] == 0 && values[1] == 0)) { fprintf(stderr, "io_uring_register operation failed before closing ring fd\n"); goto err; } ret = io_uring_close_ring_fd(&ring); if (ret != 1) { fprintf(stderr, "closing ring fd failed\n"); goto err; } values[0] = values[1] = 0; ret = io_uring_register_iowq_max_workers(&ring, values); if (ret || (values[0] == 0 && values[1] == 0)) { fprintf(stderr, "io_uring_register operation failed after closing ring fd\n"); goto err; } ret = io_uring_close_ring_fd(&ring); if (ret != -EBADF) { fprintf(stderr, "closing already-closed ring fd should fail\n"); goto err; } io_uring_queue_exit(&ring); return T_EXIT_PASS; err: io_uring_queue_exit(&ring); return T_EXIT_FAIL; } liburing-2.6/test/regbuf-merge.c000066400000000000000000000057571461424365000167110ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include "helpers.h" #ifndef __NR_io_uring_register #define __NR_io_uring_register 427 #endif #ifndef __NR_io_uring_setup #define __NR_io_uring_setup 425 #endif #define SIZEOF_IO_URING_SQE 64 #define SIZEOF_IO_URING_CQE 16 #define SQ_HEAD_OFFSET 0 #define SQ_TAIL_OFFSET 64 #define SQ_RING_MASK_OFFSET 256 #define SQ_RING_ENTRIES_OFFSET 264 #define SQ_FLAGS_OFFSET 276 #define SQ_DROPPED_OFFSET 272 #define CQ_HEAD_OFFSET 128 #define CQ_TAIL_OFFSET 192 #define CQ_RING_MASK_OFFSET 260 #define CQ_RING_ENTRIES_OFFSET 268 #define CQ_RING_OVERFLOW_OFFSET 284 #define CQ_FLAGS_OFFSET 280 #define CQ_CQES_OFFSET 320 static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5) { uint32_t entries = (uint32_t)a0; struct io_uring_params* setup_params = (struct io_uring_params*)a1; void* vma1 = (void*)a2; void* vma2 = (void*)a3; void** ring_ptr_out = (void**)a4; void** sqes_ptr_out = (void**)a5; uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params); uint32_t sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t); uint32_t cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE; uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQ_RING); uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); return fd_io_uring; } static uint64_t r[1] = {0xffffffffffffffff}; int main(int argc, char *argv[]) { intptr_t res = 0; if (argc > 1) return T_EXIT_SKIP; mmap((void *) 0x1ffff000ul, 0x1000ul, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul); mmap((void *) 0x20000000ul, 0x1000000ul, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul); mmap((void *) 0x21000000ul, 0x1000ul, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0ul); *(uint32_t*)0x20000684 = 0; *(uint32_t*)0x20000688 = 0; *(uint32_t*)0x2000068c = 0; *(uint32_t*)0x20000690 = 0; *(uint32_t*)0x20000698 = -1; memset((void*)0x2000069c, 0, 12); res = syz_io_uring_setup(0x2fd6, 0x20000680, 0x20ffd000, 0x20ffc000, 0x20000700, 0x20000740); if (res != -1) r[0] = res; *(uint64_t*)0x20002840 = 0; *(uint64_t*)0x20002848 = 0; *(uint64_t*)0x20002850 = 0x20000840; *(uint64_t*)0x20002858 = 0x1000; syscall(__NR_io_uring_register, r[0], 0ul, 0x20002840ul, 2ul); return T_EXIT_PASS; } liburing-2.6/test/register-restrictions.c000066400000000000000000000337031461424365000207040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test restrictions * */ #include #include #include #include #include #include #include #include #include "liburing.h" enum { TEST_OK, TEST_SKIPPED, TEST_FAILED }; static int test_restrictions_sqe_op(void) { struct io_uring_restriction res[2]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, pipe1[2]; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } res[0].opcode = IORING_RESTRICTION_SQE_OP; res[0].sqe_op = IORING_OP_WRITEV; res[1].opcode = IORING_RESTRICTION_SQE_OP; res[1].sqe_op = IORING_OP_WRITE; ret = io_uring_register_restrictions(&ring, res, 2); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "failed to register restrictions: %d\n", ret); return TEST_FAILED; } ret = io_uring_enable_rings(&ring); if (ret) { fprintf(stderr, "ring enabling failed: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, pipe1[0], &vec, 1, 0); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } for (int i = 0; i < 2; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return TEST_FAILED; } switch (cqe->user_data) { case 1: /* writev */ if (cqe->res != sizeof(ptr)) { fprintf(stderr, "write res: %d\n", cqe->res); return TEST_FAILED; } break; case 2: /* readv should be denied */ if (cqe->res != -EACCES) { fprintf(stderr, "read res: %d\n", cqe->res); return TEST_FAILED; } break; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_register_op(void) { struct io_uring_restriction res[1]; struct io_uring ring; int ret, pipe1[2]; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } res[0].opcode = IORING_RESTRICTION_REGISTER_OP; res[0].register_op = IORING_REGISTER_BUFFERS; ret = io_uring_register_restrictions(&ring, res, 1); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "failed to register restrictions: %d\n", ret); return TEST_FAILED; } ret = io_uring_enable_rings(&ring); if (ret) { fprintf(stderr, "ring enabling failed: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_buffers(&ring, &vec, 1); if (ret) { fprintf(stderr, "io_uring_register_buffers failed: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_files(&ring, pipe1, 2); if (ret != -EACCES) { fprintf(stderr, "io_uring_register_files ret: %d\n", ret); return TEST_FAILED; } io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_fixed_file(void) { struct io_uring_restriction res[4]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, pipe1[2]; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } res[0].opcode = IORING_RESTRICTION_SQE_OP; res[0].sqe_op = IORING_OP_WRITEV; res[1].opcode = IORING_RESTRICTION_SQE_OP; res[1].sqe_op = IORING_OP_READV; res[2].opcode = IORING_RESTRICTION_SQE_FLAGS_REQUIRED; res[2].sqe_flags = IOSQE_FIXED_FILE; res[3].opcode = IORING_RESTRICTION_REGISTER_OP; res[3].register_op = IORING_REGISTER_FILES; ret = io_uring_register_restrictions(&ring, res, 4); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "failed to register restrictions: %d\n", ret); return TEST_FAILED; } ret = io_uring_enable_rings(&ring); if (ret) { fprintf(stderr, "ring enabling failed: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_files(&ring, pipe1, 2); if (ret) { fprintf(stderr, "io_uring_register_files ret: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, 1, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, 0, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); sqe->user_data = 2; sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); sqe->user_data = 3; ret = io_uring_submit(&ring); if (ret != 3) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } for (int i = 0; i < 3; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return TEST_FAILED; } switch (cqe->user_data) { case 1: /* writev */ if (cqe->res != sizeof(ptr)) { fprintf(stderr, "write res: %d\n", cqe->res); return TEST_FAILED; } break; case 2: /* readv */ if (cqe->res != sizeof(ptr)) { fprintf(stderr, "read res: %d\n", cqe->res); return TEST_FAILED; } break; case 3: /* writev without fixed_file should be denied */ if (cqe->res != -EACCES) { fprintf(stderr, "write res: %d\n", cqe->res); return TEST_FAILED; } break; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_flags(void) { struct io_uring_restriction res[3]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, pipe1[2]; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } res[0].opcode = IORING_RESTRICTION_SQE_OP; res[0].sqe_op = IORING_OP_WRITEV; res[1].opcode = IORING_RESTRICTION_SQE_FLAGS_ALLOWED; res[1].sqe_flags = IOSQE_ASYNC | IOSQE_IO_LINK; res[2].opcode = IORING_RESTRICTION_SQE_FLAGS_REQUIRED; res[2].sqe_flags = IOSQE_FIXED_FILE; ret = io_uring_register_restrictions(&ring, res, 3); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "failed to register restrictions: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_files(&ring, pipe1, 2); if (ret) { fprintf(stderr, "io_uring_register_files ret: %d\n", ret); return TEST_FAILED; } ret = io_uring_enable_rings(&ring); if (ret) { fprintf(stderr, "ring enabling failed: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, 1, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); sqe->user_data = 1; sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, 1, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_ASYNC); sqe->user_data = 2; sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, 1, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK); sqe->user_data = 3; ret = io_uring_submit(&ring); if (ret != 3) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, 1, &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_DRAIN); sqe->user_data = 4; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_IO_DRAIN); sqe->user_data = 5; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); io_uring_sqe_set_flags(sqe, IOSQE_ASYNC); sqe->user_data = 6; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); sqe->user_data = 7; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } for (int i = 0; i < 7; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return TEST_FAILED; } switch (cqe->user_data) { case 1: /* writev - flags = IOSQE_FIXED_FILE */ case 2: /* writev - flags = IOSQE_FIXED_FILE | IOSQE_ASYNC */ case 3: /* writev - flags = IOSQE_FIXED_FILE | IOSQE_IO_LINK */ if (cqe->res != sizeof(ptr)) { fprintf(stderr, "write res: %d user_data %" PRIu64 "\n", cqe->res, (uint64_t) cqe->user_data); return TEST_FAILED; } break; case 4: /* writev - flags = IOSQE_FIXED_FILE | IOSQE_IO_DRAIN */ case 5: /* writev - flags = IOSQE_IO_DRAIN */ case 6: /* writev - flags = IOSQE_ASYNC */ case 7: /* writev - flags = 0 */ if (cqe->res != -EACCES) { fprintf(stderr, "write res: %d user_data %" PRIu64 "\n", cqe->res, (uint64_t) cqe->user_data); return TEST_FAILED; } break; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_empty(void) { struct io_uring_restriction res[0]; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, pipe1[2]; uint64_t ptr; struct iovec vec = { .iov_base = &ptr, .iov_len = sizeof(ptr) }; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_restrictions(&ring, res, 0); if (ret) { if (ret == -EINVAL) return TEST_SKIPPED; fprintf(stderr, "failed to register restrictions: %d\n", ret); return TEST_FAILED; } ret = io_uring_enable_rings(&ring); if (ret) { fprintf(stderr, "ring enabling failed: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_buffers(&ring, &vec, 1); if (ret != -EACCES) { fprintf(stderr, "io_uring_register_buffers ret: %d\n", ret); return TEST_FAILED; } ret = io_uring_register_files(&ring, pipe1, 2); if (ret != -EACCES) { fprintf(stderr, "io_uring_register_files ret: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_writev(sqe, pipe1[1], &vec, 1, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait: %d\n", ret); return TEST_FAILED; } if (cqe->res != -EACCES) { fprintf(stderr, "write res: %d\n", cqe->res); return TEST_FAILED; } io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_rings_not_disabled(void) { struct io_uring_restriction res[1]; struct io_uring ring; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } res[0].opcode = IORING_RESTRICTION_SQE_OP; res[0].sqe_op = IORING_OP_WRITEV; ret = io_uring_register_restrictions(&ring, res, 1); if (ret != -EBADFD) { fprintf(stderr, "io_uring_register_restrictions ret: %d\n", ret); return TEST_FAILED; } io_uring_queue_exit(&ring); return TEST_OK; } static int test_restrictions_rings_disabled(void) { struct io_uring_sqe *sqe; struct io_uring ring; int ret; ret = io_uring_queue_init(8, &ring, IORING_SETUP_R_DISABLED); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return TEST_FAILED; } sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); ret = io_uring_submit(&ring); if (ret != -EBADFD) { fprintf(stderr, "submit: %d\n", ret); return TEST_FAILED; } io_uring_queue_exit(&ring); return TEST_OK; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_restrictions_sqe_op(); if (ret == TEST_SKIPPED) { printf("test_restrictions_sqe_op: skipped\n"); return 0; } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_sqe_op failed\n"); return ret; } ret = test_restrictions_register_op(); if (ret == TEST_SKIPPED) { printf("test_restrictions_register_op: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_register_op failed\n"); return ret; } ret = test_restrictions_fixed_file(); if (ret == TEST_SKIPPED) { printf("test_restrictions_fixed_file: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_fixed_file failed\n"); return ret; } ret = test_restrictions_flags(); if (ret == TEST_SKIPPED) { printf("test_restrictions_flags: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_flags failed\n"); return ret; } ret = test_restrictions_empty(); if (ret == TEST_SKIPPED) { printf("test_restrictions_empty: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_empty failed\n"); return ret; } ret = test_restrictions_rings_not_disabled(); if (ret == TEST_SKIPPED) { printf("test_restrictions_rings_not_disabled: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_rings_not_disabled failed\n"); return ret; } ret = test_restrictions_rings_disabled(); if (ret == TEST_SKIPPED) { printf("test_restrictions_rings_disabled: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_restrictions_rings_disabled failed\n"); return ret; } return 0; } liburing-2.6/test/rename.c000066400000000000000000000042401461424365000155730ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various rename tests * */ #include #include #include #include #include #include #include #include "liburing.h" static int test_rename(struct io_uring *ring, const char *old, const char *new) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } memset(sqe, 0, sizeof(*sqe)); io_uring_prep_rename(sqe, old, new); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int stat_file(const char *buf) { struct stat sb; if (!stat(buf, &sb)) return 0; return errno; } int main(int argc, char *argv[]) { struct io_uring ring; char src[32] = "./XXXXXX"; char dst[32] = "./XXXXXX"; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = mkstemp(src); if (ret < 0) { perror("mkstemp"); return 1; } close(ret); ret = mkstemp(dst); if (ret < 0) { perror("mkstemp"); return 1; } close(ret); if (stat_file(src) != 0) { perror("stat"); return 1; } if (stat_file(dst) != 0) { perror("stat"); return 1; } ret = test_rename(&ring, src, dst); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "Rename not supported, skipping\n"); goto out; } fprintf(stderr, "rename: %s\n", strerror(-ret)); goto err; } else if (ret) goto err; if (stat_file(src) != ENOENT) { fprintf(stderr, "stat got %s\n", strerror(ret)); return 1; } if (stat_file(dst) != 0) { perror("stat"); return 1; } ret = test_rename(&ring, "/x/y/1/2", "/2/1/y/x"); if (ret != -ENOENT) { fprintf(stderr, "test_rename invalid failed: %d\n", ret); return ret; } out: unlink(dst); return 0; err: unlink(src); unlink(dst); return 1; } liburing-2.6/test/ring-leak.c000066400000000000000000000133031461424365000161750ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Based on description from Al Viro - this demonstrates a leak of the * io_uring instance, by sending the io_uring fd over a UNIX socket. * * See: * * https://lore.kernel.org/linux-block/20190129192702.3605-1-axboe@kernel.dk/T/#m6c87fc64e4d063786af6ec6fadce3ac1e95d3184 * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #include "../src/syscall.h" static int __io_uring_register_files(int ring_fd, int fd1, int fd2) { __s32 fds[2] = { fd1, fd2 }; return __sys_io_uring_register(ring_fd, IORING_REGISTER_FILES, fds, 2); } static int get_ring_fd(void) { struct io_uring_params p; int fd; memset(&p, 0, sizeof(p)); fd = __sys_io_uring_setup(2, &p); if (fd < 0) { perror("io_uring_setup"); return -1; } return fd; } static int send_fd(int socket, int fd) { char buf[CMSG_SPACE(sizeof(fd))]; struct cmsghdr *cmsg; struct msghdr msg; memset(buf, 0, sizeof(buf)); memset(&msg, 0, sizeof(msg)); msg.msg_control = buf; msg.msg_controllen = sizeof(buf); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); memmove(CMSG_DATA(cmsg), &fd, sizeof(fd)); msg.msg_controllen = CMSG_SPACE(sizeof(fd)); if (sendmsg(socket, &msg, 0) < 0) { if (errno == EINVAL) return T_EXIT_SKIP; perror("sendmsg"); return T_EXIT_FAIL; } return T_EXIT_PASS; } static int test_iowq_request_cancel(void) { char buffer[128]; struct io_uring ring; struct io_uring_sqe *sqe; int ret, fds[2]; ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { fprintf(stderr, "failed to init io_uring: %s\n", strerror(-ret)); return ret; } if (pipe(fds)) { perror("pipe"); return -1; } ret = io_uring_register_files(&ring, fds, 2); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } close(fds[1]); sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } /* potentially sitting in internal polling */ io_uring_prep_read(sqe, 0, buffer, 10, 0); sqe->flags |= IOSQE_FIXED_FILE; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: failed to get sqe\n", __FUNCTION__); return 1; } /* staying in io-wq */ io_uring_prep_read(sqe, 0, buffer, 10, 0); sqe->flags |= IOSQE_FIXED_FILE | IOSQE_ASYNC; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } /* should unregister files and close the write fd */ io_uring_queue_exit(&ring); /* * We're trying to wait for the ring to "really" exit, that will be * done async. For that rely on the registered write end to be closed * after ring quiesce, so failing read from the other pipe end. */ ret = read(fds[0], buffer, 10); if (ret < 0) perror("read"); close(fds[0]); return 0; } static void trigger_unix_gc(void) { int fd; fd = socket(AF_UNIX, SOCK_DGRAM, 0); if (fd < 0) perror("socket dgram"); else close(fd); } static int test_scm_cycles(bool update) { char buffer[128]; struct io_uring ring; int i, ret; int sp[2], fds[2], reg_fds[4]; if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp) != 0) { perror("Failed to create Unix-domain socket pair\n"); return 1; } ret = io_uring_queue_init(8, &ring, 0); if (ret < 0) { fprintf(stderr, "failed to init io_uring: %s\n", strerror(-ret)); return ret; } if (pipe(fds)) { perror("pipe"); return -1; } ret = send_fd(sp[0], ring.ring_fd); if (ret != T_EXIT_PASS) return ret; /* register an empty set for updates */ if (update) { for (i = 0; i < 4; i++) reg_fds[i] = -1; ret = io_uring_register_files(&ring, reg_fds, 4); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } } reg_fds[0] = fds[0]; reg_fds[1] = fds[1]; reg_fds[2] = sp[0]; reg_fds[3] = sp[1]; if (update) { ret = io_uring_register_files_update(&ring, 0, reg_fds, 4); if (ret != 4) { fprintf(stderr, "file_register: %d\n", ret); return ret; } } else { ret = io_uring_register_files(&ring, reg_fds, 4); if (ret) { fprintf(stderr, "file_register: %d\n", ret); return ret; } } close(fds[1]); close(sp[0]); close(sp[1]); /* should unregister files and close the write fd */ io_uring_queue_exit(&ring); trigger_unix_gc(); /* * We're trying to wait for the ring to "really" exit, that will be * done async. For that rely on the registered write end to be closed * after ring quiesce, so failing read from the other pipe end. */ ret = read(fds[0], buffer, 10); if (ret < 0) perror("read"); close(fds[0]); return 0; } int main(int argc, char *argv[]) { int sp[2], pid, ring_fd, ret; int i; if (argc > 1) return 0; ret = test_iowq_request_cancel(); if (ret) { fprintf(stderr, "test_iowq_request_cancel() failed\n"); return 1; } for (i = 0; i < 2; i++) { bool update = !!(i & 1); ret = test_scm_cycles(update); if (ret == T_EXIT_SKIP) return T_EXIT_SKIP; if (ret) { fprintf(stderr, "test_scm_cycles() failed %i\n", update); return 1; } } if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp) != 0) { perror("Failed to create Unix-domain socket pair\n"); return 1; } ring_fd = get_ring_fd(); if (ring_fd < 0) return 1; ret = __io_uring_register_files(ring_fd, sp[0], sp[1]); if (ret < 0) { perror("register files"); return 1; } pid = fork(); if (pid) { ret = send_fd(sp[0], ring_fd); if (ret != T_EXIT_PASS) return ret; } close(ring_fd); close(sp[0]); close(sp[1]); return 0; } liburing-2.6/test/ring-leak2.c000066400000000000000000000121631461424365000162620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Test two ring deadlock. A buggy kernel will end up * having io_wq_* workers pending, as the circular reference * will prevent full exit. * * Based on a test case from Josef * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "../src/syscall.h" enum { ACCEPT, READ, WRITE, POLLING_IN, POLLING_RDHUP, CLOSE, EVENTFD_READ, }; typedef struct conn_info { __u32 fd; __u16 type; __u16 bid; } conn_info; static char read_eventfd_buffer[8]; static pthread_mutex_t lock; static struct io_uring *client_ring; static int client_eventfd = -1; static int setup_io_uring(struct io_uring *ring) { struct io_uring_params p = { }; int ret; ret = io_uring_queue_init_params(8, ring, &p); if (ret) { fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret)); return 1; } return 0; } static void add_socket_eventfd_read(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; conn_info conn_i = { .fd = fd, .type = EVENTFD_READ, }; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fd, &read_eventfd_buffer, 8, 0); io_uring_sqe_set_flags(sqe, IOSQE_ASYNC); memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); } static void add_socket_pollin(struct io_uring *ring, int fd) { struct io_uring_sqe *sqe; conn_info conn_i = { .fd = fd, .type = POLLING_IN, }; sqe = io_uring_get_sqe(ring); io_uring_prep_poll_add(sqe, fd, POLL_IN); memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); } static void *server_thread(void *arg) { struct sockaddr_in serv_addr; int port = 0; int sock_listen_fd, evfd; const int val = 1; struct io_uring ring; sock_listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); memset(&serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_port = htons(port); serv_addr.sin_addr.s_addr = INADDR_ANY; evfd = eventfd(0, EFD_CLOEXEC); // bind and listen if (bind(sock_listen_fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { perror("Error binding socket...\n"); exit(1); } if (listen(sock_listen_fd, 1) < 0) { perror("Error listening on socket...\n"); exit(1); } setup_io_uring(&ring); add_socket_eventfd_read(&ring, evfd); add_socket_pollin(&ring, sock_listen_fd); while (1) { struct io_uring_cqe *cqe; unsigned head; unsigned count = 0; io_uring_submit_and_wait(&ring, 1); io_uring_for_each_cqe(&ring, head, cqe) { struct conn_info conn_i; count++; memcpy(&conn_i, &cqe->user_data, sizeof(conn_i)); if (conn_i.type == ACCEPT) { int sock_conn_fd = cqe->res; // only read when there is no error, >= 0 if (sock_conn_fd > 0) { add_socket_pollin(&ring, sock_listen_fd); pthread_mutex_lock(&lock); io_uring_submit(client_ring); pthread_mutex_unlock(&lock); } } else if (conn_i.type == POLLING_IN) { break; } } io_uring_cq_advance(&ring, count); } } static void *client_thread(void *arg) { struct io_uring ring; int ret; setup_io_uring(&ring); client_ring = ˚ client_eventfd = eventfd(0, EFD_CLOEXEC); pthread_mutex_lock(&lock); add_socket_eventfd_read(&ring, client_eventfd); pthread_mutex_unlock(&lock); while (1) { struct io_uring_cqe *cqe; unsigned head; unsigned count = 0; pthread_mutex_lock(&lock); io_uring_submit(&ring); pthread_mutex_unlock(&lock); ret = __sys_io_uring_enter(ring.ring_fd, 0, 1, IORING_ENTER_GETEVENTS, NULL); if (ret < 0) { perror("Error io_uring_enter...\n"); exit(1); } // go through all CQEs io_uring_for_each_cqe(&ring, head, cqe) { struct conn_info conn_i; int type; count++; memcpy(&conn_i, &cqe->user_data, sizeof(conn_i)); type = conn_i.type; if (type == READ) { pthread_mutex_lock(&lock); if (cqe->res <= 0) { // connection closed or error shutdown(conn_i.fd, SHUT_RDWR); } else { pthread_mutex_unlock(&lock); break; } add_socket_pollin(&ring, conn_i.fd); pthread_mutex_unlock(&lock); } else if (type == WRITE) { } else if (type == POLLING_IN) { break; } else if (type == POLLING_RDHUP) { break; } else if (type == CLOSE) { } else if (type == EVENTFD_READ) { add_socket_eventfd_read(&ring, client_eventfd); } } io_uring_cq_advance(&ring, count); } } static void sig_alrm(int sig) { exit(0); } int main(int argc, char *argv[]) { pthread_t server_thread_t, client_thread_t; struct sigaction act; if (argc > 1) return 0; if (pthread_mutex_init(&lock, NULL) != 0) { printf("\n mutex init failed\n"); return 1; } pthread_create(&server_thread_t, NULL, &server_thread, NULL); pthread_create(&client_thread_t, NULL, &client_thread, NULL); memset(&act, 0, sizeof(act)); act.sa_handler = sig_alrm; act.sa_flags = SA_RESTART; sigaction(SIGALRM, &act, NULL); alarm(1); pthread_join(server_thread_t, NULL); return 0; } liburing-2.6/test/ringbuf-read.c000066400000000000000000000072251461424365000166770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: ring mapped provided buffers with reads * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BUF_SIZE 4096 #define NR_BUFS 64 #define FSIZE (BUF_SIZE * NR_BUFS) #define BR_MASK (NR_BUFS - 1) static int no_buf_ring; static int verify_buffer(char *buf, char val) { int i; for (i = 0; i < BUF_SIZE; i++) { if (buf[i] != val) { fprintf(stderr, "got %d, wanted %d\n", buf[i], val); return 1; } } return 0; } static int test(const char *filename, int dio, int async) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct io_uring_buf_ring *br; int ret, fd, i; char *buf; void *ptr; ret = io_uring_queue_init(NR_BUFS, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (dio) { fd = open(filename, O_DIRECT | O_RDONLY); if (fd < 0 && errno == EINVAL) return T_EXIT_SKIP; } else { fd = open(filename, O_RDONLY); } if (fd < 0) { perror("open"); return 1; } posix_fadvise(fd, 0, FSIZE, POSIX_FADV_DONTNEED); if (posix_memalign((void **) &buf, 4096, FSIZE)) return 1; br = io_uring_setup_buf_ring(&ring, NR_BUFS, 1, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return 0; } fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ptr = buf; for (i = 0; i < NR_BUFS; i++) { io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, BR_MASK, i); ptr += BUF_SIZE; } io_uring_buf_ring_advance(br, NR_BUFS); for (i = 0; i < NR_BUFS; i++) { sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fd, NULL, BUF_SIZE, i * BUF_SIZE); sqe->buf_group = 1; sqe->flags |= IOSQE_BUFFER_SELECT; if (async && !(i & 1)) sqe->flags |= IOSQE_ASYNC; sqe->user_data = i + 1; } ret = io_uring_submit(&ring); if (ret != NR_BUFS) { fprintf(stderr, "submit: %d\n", ret); return 1; } for (i = 0; i < NR_BUFS; i++) { int bid, ud; ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return 1; } if (cqe->res != BUF_SIZE) { fprintf(stderr, "cqe res %d\n", cqe->res); return 1; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "no buffer selected\n"); return 1; } bid = cqe->flags >> IORING_CQE_BUFFER_SHIFT; ud = cqe->user_data; io_uring_cqe_seen(&ring, cqe); if (verify_buffer(buf + ((bid - 1) * BUF_SIZE), ud)) return 1; } return 0; } int main(int argc, char *argv[]) { char buf[BUF_SIZE]; char fname[80]; int ret, fd, i, do_unlink; if (argc > 1) { strcpy(fname, argv[1]); do_unlink = 0; } else { sprintf(fname, ".ringbuf-read.%d", getpid()); t_create_file(fname, FSIZE); do_unlink = 1; } fd = open(fname, O_WRONLY); if (fd < 0) { perror("open"); goto err; } for (i = 0; i < NR_BUFS; i++) { memset(buf, i + 1, BUF_SIZE); ret = write(fd, buf, BUF_SIZE); if (ret != BUF_SIZE) { fprintf(stderr, "bad file prep write\n"); close(fd); goto err; } } close(fd); ret = test(fname, 1, 0); if (ret == T_EXIT_FAIL) { fprintf(stderr, "dio test failed\n"); goto err; } if (no_buf_ring) goto pass; ret = test(fname, 0, 0); if (ret) { fprintf(stderr, "buffered test failed\n"); goto err; } ret = test(fname, 1, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "dio async test failed\n"); goto err; } ret = test(fname, 0, 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "buffered async test failed\n"); goto err; } pass: ret = T_EXIT_PASS; goto out; err: ret = T_EXIT_FAIL; out: if (do_unlink) unlink(fname); return ret; } liburing-2.6/test/ringbuf-status.c000066400000000000000000000122531461424365000173040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test reading provided ring buf head * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define BUF_SIZE 32 #define NR_BUFS 8 #define FSIZE (BUF_SIZE * NR_BUFS) #define BR_MASK (NR_BUFS - 1) #define BGID 1 static int no_buf_ring; static int no_buf_ring_status; static int test_max(void) { struct io_uring_buf_ring *br; struct io_uring ring; int nr_bufs = 32768; int ret, i; char *buf; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (posix_memalign((void **) &buf, 4096, FSIZE)) return 1; br = io_uring_setup_buf_ring(&ring, nr_bufs, BGID, 0, &ret); if (!br) { fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ret = io_uring_buf_ring_available(&ring, br, BGID); if (ret) { fprintf(stderr, "Bad available count %d\n", ret); return 1; } for (i = 0; i < nr_bufs / 2; i++) io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i); io_uring_buf_ring_advance(br, nr_bufs / 2); ret = io_uring_buf_ring_available(&ring, br, BGID); if (ret != nr_bufs / 2) { fprintf(stderr, "Bad half full available count %d\n", ret); return 1; } for (i = 0; i < nr_bufs / 2; i++) io_uring_buf_ring_add(br, buf, BUF_SIZE, i + 1, nr_bufs - 1, i); io_uring_buf_ring_advance(br, nr_bufs / 2); ret = io_uring_buf_ring_available(&ring, br, BGID); if (ret != nr_bufs) { fprintf(stderr, "Bad half full available count %d\n", ret); return 1; } free(buf); io_uring_queue_exit(&ring); return T_EXIT_PASS; } static int test(int invalid) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; struct io_uring_buf_ring *br; int ret, i, fds[2]; uint16_t head; char *buf; void *ptr; char output[16]; memset(output, 0x55, sizeof(output)); ret = io_uring_queue_init(NR_BUFS, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } if (posix_memalign((void **) &buf, 4096, FSIZE)) return 1; br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret); if (!br) { if (ret == -EINVAL) { no_buf_ring = 1; return 0; } fprintf(stderr, "Buffer ring register failed %d\n", ret); return 1; } ptr = buf; for (i = 0; i < NR_BUFS; i++) { io_uring_buf_ring_add(br, ptr, BUF_SIZE, i + 1, BR_MASK, i); ptr += BUF_SIZE; } io_uring_buf_ring_advance(br, NR_BUFS); /* head should be zero at this point */ head = 1; if (!invalid) ret = io_uring_buf_ring_head(&ring, BGID, &head); else ret = io_uring_buf_ring_head(&ring, BGID + 10, &head); if (ret) { if (ret == -EINVAL) { no_buf_ring_status = 1; return T_EXIT_SKIP; } if (invalid && ret == -ENOENT) return T_EXIT_PASS; fprintf(stderr, "buf_ring_head: %d\n", ret); return T_EXIT_FAIL; } if (invalid) { fprintf(stderr, "lookup of bad group id succeeded\n"); return T_EXIT_FAIL; } if (head != 0) { fprintf(stderr, "bad head %d\n", head); return T_EXIT_FAIL; } ret = io_uring_buf_ring_available(&ring, br, BGID); if (ret != NR_BUFS) { fprintf(stderr, "ring available %d\n", ret); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], NULL, BUF_SIZE, i * BUF_SIZE); sqe->buf_group = BGID; sqe->flags |= IOSQE_BUFFER_SELECT; sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit: %d\n", ret); return T_EXIT_FAIL; } /* head should still be zero at this point, no buffers consumed */ head = 1; ret = io_uring_buf_ring_head(&ring, BGID, &head); if (head != 0) { fprintf(stderr, "bad head after submit %d\n", head); return T_EXIT_FAIL; } ret = write(fds[1], output, sizeof(output)); if (ret != sizeof(output)) { fprintf(stderr, "pipe buffer write %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait cqe failed %d\n", ret); return T_EXIT_FAIL; } if (cqe->res != sizeof(output)) { fprintf(stderr, "cqe res %d\n", cqe->res); return T_EXIT_FAIL; } if (!(cqe->flags & IORING_CQE_F_BUFFER)) { fprintf(stderr, "no buffer selected\n"); return T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); /* head should now be one, we consumed a buffer */ ret = io_uring_buf_ring_head(&ring, BGID, &head); if (head != 1) { fprintf(stderr, "bad head after cqe %d\n", head); return T_EXIT_FAIL; } ret = io_uring_buf_ring_available(&ring, br, BGID); if (ret != NR_BUFS - 1) { fprintf(stderr, "ring available %d\n", ret); return T_EXIT_FAIL; } close(fds[0]); close(fds[1]); free(buf); io_uring_queue_exit(&ring); return T_EXIT_PASS; } int main(int argc, char *argv[]) { int ret; ret = test(0); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test 0 failed\n"); return T_EXIT_FAIL; } if (no_buf_ring || no_buf_ring_status) return T_EXIT_SKIP; ret = test(1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test 1 failed\n"); return T_EXIT_FAIL; } ret = test_max(); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_max failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/rsrc_tags.c000066400000000000000000000245431461424365000163230ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various file registration tests * */ #include #include #include #include #include #include #include #include "../src/syscall.h" #include "helpers.h" #include "liburing.h" static int pipes[2]; enum { TEST_IORING_RSRC_FILE = 0, TEST_IORING_RSRC_BUFFER = 1, }; static bool check_cq_empty(struct io_uring *ring) { struct io_uring_cqe *cqe = NULL; int ret; usleep(1000); /* doesn't happen immediately, so wait */ ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */ return ret == -EAGAIN; } /* * There are io_uring_register_buffers_tags() and other wrappers, * but they may change, so hand-code to specifically test this ABI. */ static int register_rsrc(struct io_uring *ring, int type, int nr, const void *arg, const __u64 *tags) { struct io_uring_rsrc_register reg; int reg_type; memset(®, 0, sizeof(reg)); reg.nr = nr; reg.data = (__u64)(uintptr_t)arg; reg.tags = (__u64)(uintptr_t)tags; reg_type = IORING_REGISTER_FILES2; if (type != TEST_IORING_RSRC_FILE) reg_type = IORING_REGISTER_BUFFERS2; return __sys_io_uring_register(ring->ring_fd, reg_type, ®, sizeof(reg)); } /* * There are io_uring_register_buffers_update_tag() and other wrappers, * but they may change, so hand-code to specifically test this ABI. */ static int update_rsrc(struct io_uring *ring, int type, int nr, int off, const void *arg, const __u64 *tags) { struct io_uring_rsrc_update2 up; int up_type; memset(&up, 0, sizeof(up)); up.offset = off; up.data = (__u64)(uintptr_t)arg; up.tags = (__u64)(uintptr_t)tags; up.nr = nr; up_type = IORING_REGISTER_FILES_UPDATE2; if (type != TEST_IORING_RSRC_FILE) up_type = IORING_REGISTER_BUFFERS_UPDATE; return __sys_io_uring_register(ring->ring_fd, up_type, &up, sizeof(up)); } static bool has_rsrc_update(void) { struct io_uring ring; int ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "io_uring_queue_init() failed, %d\n", ret); exit(1); } ret = ring.features & IORING_FEAT_RSRC_TAGS; io_uring_queue_exit(&ring); return ret; } static int test_tags_generic(int nr, int type, void *rsrc, int ring_flags) { struct io_uring_cqe *cqe = NULL; struct io_uring ring; int i, ret; __u64 *tags; tags = malloc(nr * sizeof(*tags)); if (!tags) return 1; for (i = 0; i < nr; i++) tags[i] = i + 1; ret = io_uring_queue_init(1, &ring, 0); if (ret) { printf("ring setup failed\n"); return 1; } ret = register_rsrc(&ring, type, nr, rsrc, tags); if (ret) { fprintf(stderr, "rsrc register failed %i\n", ret); return 1; } /* test that tags are set */ tags[0] = 666; ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]); assert(ret == 1); ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == 1); io_uring_cqe_seen(&ring, cqe); /* test that tags are updated */ tags[0] = 0; ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]); assert(ret == 1); ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == 666); io_uring_cqe_seen(&ring, cqe); /* test tag=0 doesn't emit CQE */ tags[0] = 1; ret = update_rsrc(&ring, type, 1, 0, rsrc, &tags[0]); assert(ret == 1); assert(check_cq_empty(&ring)); free(tags); io_uring_queue_exit(&ring); return 0; } static int test_buffers_update(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe = NULL; struct io_uring ring; const int nr = 5; int buf_idx = 1, i, ret; int pipes[2]; char tmp_buf[1024]; char tmp_buf2[1024]; struct iovec vecs[nr]; __u64 tags[nr]; for (i = 0; i < nr; i++) { vecs[i].iov_base = tmp_buf; vecs[i].iov_len = 1024; tags[i] = i + 1; } ret = test_tags_generic(nr, TEST_IORING_RSRC_BUFFER, vecs, 0); if (ret) return 1; ret = io_uring_queue_init(1, &ring, 0); if (ret) { printf("ring setup failed\n"); return 1; } if (pipe(pipes) < 0) { perror("pipe"); return 1; } ret = register_rsrc(&ring, TEST_IORING_RSRC_BUFFER, nr, vecs, tags); if (ret) { fprintf(stderr, "rsrc register failed %i\n", ret); return 1; } /* test that CQE is not emitted before we're done with a buffer */ sqe = io_uring_get_sqe(&ring); io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 10, 0, 0); sqe->user_data = 100; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_peek_cqe(&ring, &cqe); assert(ret == -EAGAIN); vecs[buf_idx].iov_base = tmp_buf2; ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, buf_idx, &vecs[buf_idx], &tags[buf_idx]); if (ret != 1) { fprintf(stderr, "rsrc update failed %i %i\n", ret, errno); return 1; } ret = io_uring_peek_cqe(&ring, &cqe); /* nothing should be there */ assert(ret == -EAGAIN); close(pipes[0]); close(pipes[1]); ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == 100); io_uring_cqe_seen(&ring, cqe); ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == buf_idx + 1); io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } static int test_buffers_empty_buffers(void) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe = NULL; struct io_uring ring; const int nr = 5; int ret, i; char tmp_buf[1024]; struct iovec vecs[nr]; for (i = 0; i < nr; i++) { vecs[i].iov_base = 0; vecs[i].iov_len = 0; } vecs[0].iov_base = tmp_buf; vecs[0].iov_len = 10; ret = io_uring_queue_init(1, &ring, 0); if (ret) { printf("ring setup failed\n"); return 1; } ret = register_rsrc(&ring, TEST_IORING_RSRC_BUFFER, nr, vecs, NULL); if (ret) { fprintf(stderr, "rsrc register failed %i\n", ret); return 1; } /* empty to buffer */ vecs[1].iov_base = tmp_buf; vecs[1].iov_len = 10; ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 1, &vecs[1], NULL); if (ret != 1) { fprintf(stderr, "rsrc update failed %i %i\n", ret, errno); return 1; } /* buffer to empty */ vecs[0].iov_base = 0; vecs[0].iov_len = 0; ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 0, &vecs[0], NULL); if (ret != 1) { fprintf(stderr, "rsrc update failed %i %i\n", ret, errno); return 1; } /* zero to zero is ok */ ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 2, &vecs[2], NULL); if (ret != 1) { fprintf(stderr, "rsrc update failed %i %i\n", ret, errno); return 1; } /* empty buf with non-zero len fails */ vecs[3].iov_base = 0; vecs[3].iov_len = 1; ret = update_rsrc(&ring, TEST_IORING_RSRC_BUFFER, 1, 3, &vecs[3], NULL); if (ret >= 0) { fprintf(stderr, "rsrc update failed %i %i\n", ret, errno); return 1; } /* test rw on empty ubuf is failed */ sqe = io_uring_get_sqe(&ring); io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 10, 0, 2); sqe->user_data = 100; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == 100); assert(cqe->res); io_uring_cqe_seen(&ring, cqe); sqe = io_uring_get_sqe(&ring); io_uring_prep_read_fixed(sqe, pipes[0], tmp_buf, 0, 0, 2); sqe->user_data = 100; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret && cqe->user_data == 100); assert(cqe->res); io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } static int test_files(int ring_flags) { struct io_uring_cqe *cqe = NULL; struct io_uring ring; const int nr = 50; int off = 5, i, ret, fd; __s32 files[nr]; __u64 tags[nr], tag; for (i = 0; i < nr; ++i) { files[i] = pipes[0]; tags[i] = i + 1; } ret = test_tags_generic(nr, TEST_IORING_RSRC_FILE, files, ring_flags); if (ret) return 1; ret = io_uring_queue_init(1, &ring, ring_flags); if (ret) { printf("ring setup failed\n"); return 1; } ret = register_rsrc(&ring, TEST_IORING_RSRC_FILE, nr, files, tags); if (ret) { fprintf(stderr, "rsrc register failed %i\n", ret); return 1; } /* check update did update tag */ fd = -1; ret = io_uring_register_files_update(&ring, off, &fd, 1); assert(ret == 1); ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "io_uring wait ret=%d\n", ret); return 1; } if (cqe->user_data != tags[off]) { fprintf(stderr, "data %lx != %lx\n", (unsigned long) cqe->user_data, (unsigned long) tags[off]); return 1; } io_uring_cqe_seen(&ring, cqe); /* remove removed file, shouldn't emit old tag */ ret = io_uring_register_files_update(&ring, off, &fd, 1); assert(ret <= 1); assert(check_cq_empty(&ring)); /* non-zero tag with remove update is disallowed */ tag = 1; fd = -1; ret = update_rsrc(&ring, TEST_IORING_RSRC_FILE, 1, off + 1, &fd, &tag); assert(ret); io_uring_queue_exit(&ring); return 0; } static int test_notag(void) { struct io_uring_cqe *cqe = NULL; struct io_uring ring; int i, ret, fd; const int nr = 50; int files[nr]; ret = io_uring_queue_init(1, &ring, 0); if (ret) { printf("ring setup failed\n"); return 1; } for (i = 0; i < nr; ++i) files[i] = pipes[0]; ret = io_uring_register_files(&ring, files, nr); assert(!ret); /* default register, update shouldn't emit CQE */ fd = -1; ret = io_uring_register_files_update(&ring, 0, &fd, 1); assert(ret == 1); assert(check_cq_empty(&ring)); ret = io_uring_unregister_files(&ring); assert(!ret); ret = io_uring_peek_cqe(&ring, &cqe); /* nothing should be there */ assert(ret); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ring_flags[] = {0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN}; int i, ret; if (argc > 1) return 0; if (!has_rsrc_update()) { fprintf(stderr, "doesn't support rsrc tags, skip\n"); return 0; } if (pipe(pipes) < 0) { perror("pipe"); return 1; } ret = test_notag(); if (ret) { printf("test_notag failed\n"); return ret; } for (i = 0; i < sizeof(ring_flags) / sizeof(ring_flags[0]); i++) { int flag = ring_flags[i]; if (flag & IORING_SETUP_DEFER_TASKRUN && !t_probe_defer_taskrun()) continue; ret = test_files(flag); if (ret) { printf("test_tag failed, type %i\n", i); return ret; } } ret = test_buffers_update(); if (ret) { printf("test_buffers_update failed\n"); return ret; } ret = test_buffers_empty_buffers(); if (ret) { printf("test_buffers_empty_buffers failed\n"); return ret; } return 0; } liburing-2.6/test/runtests-loop.sh000077500000000000000000000003331461424365000173540ustar00rootroot00000000000000#!/usr/bin/env bash TESTS=("$@") ITER=0 while true; do ./runtests.sh "${TESTS[@]}" RET="$?" if [ "${RET}" -ne 0 ]; then echo "Tests failed at loop $ITER" break fi echo "Finished loop $ITER" ((ITER++)) done liburing-2.6/test/runtests-quiet.sh000077500000000000000000000003051461424365000175310ustar00rootroot00000000000000#!/usr/bin/env bash TESTS=("$@") RESULT_FILE=$(mktemp) ./runtests.sh "${TESTS[@]}" > "$RESULT_FILE" 2>&1 RET="$?" if [ "${RET}" -ne 0 ]; then cat "$RESULT_FILE" fi rm "$RESULT_FILE" exit $RET liburing-2.6/test/runtests.sh000077500000000000000000000071251461424365000164130ustar00rootroot00000000000000#!/usr/bin/env bash TESTS=("$@") TIMEOUT=60 DMESG_FILTER="cat" TEST_DIR=$(dirname "$0") FAILED=() SKIPPED=() TIMED_OUT=() TEST_FILES="" declare -A TEST_MAP # Only use /dev/kmsg if running as root DO_KMSG="1" [ "$(id -u)" != "0" ] && DO_KMSG="0" # Include config.local if exists and check TEST_FILES for valid devices if [ -f "$TEST_DIR/config.local" ]; then # shellcheck source=/dev/null disable=SC1091 . "$TEST_DIR/config.local" for dev in $TEST_FILES; do if [ ! -e "$dev" ]; then echo "Test file $dev not valid" exit 1 fi done for dev in "${TEST_MAP[@]}"; do if [ ! -e "$dev" ]; then echo "Test file in map $dev not valid" exit 1 fi done fi _check_dmesg() { local dmesg_marker="$1" local seqres="$2.seqres" if [ "$DO_KMSG" -eq 0 ]; then return 0 fi dmesg | bash -c "$DMESG_FILTER" | grep -A 9999 "$dmesg_marker" >"${seqres}.dmesg" grep -q -e "kernel BUG at" \ -e "WARNING:" \ -e "BUG:" \ -e "Oops:" \ -e "possible recursive locking detected" \ -e "Internal error" \ -e "INFO: suspicious RCU usage" \ -e "INFO: possible circular locking dependency detected" \ -e "general protection fault:" \ -e "blktests failure" \ "${seqres}.dmesg" # shellcheck disable=SC2181 if [[ $? -eq 0 ]]; then return 1 else rm -f "${seqres}.dmesg" return 0 fi } run_test() { local test_name="$1" local dev="$2" local test_exec=("./$test_name") local test_string="$test_name" local out_name="$test_name" # Specify test string to print if [ -n "$dev" ]; then test_exec+=("$dev") test_string="$test_name $dev" local suffix suffix=$(basename "$dev") out_name="$out_name.$suffix" fi # Log start of the test if [ "$DO_KMSG" -eq 1 ]; then local dmesg_marker="Running test $test_string:" echo "$dmesg_marker" > /dev/kmsg else local dmesg_marker="" fi printf "Running test %-55s" "$test_string" # Do we have to exclude the test ? echo "$TEST_EXCLUDE" | grep -w "$test_name" > /dev/null 2>&1 # shellcheck disable=SC2181 if [ $? -eq 0 ]; then echo "Test skipped" SKIPPED+=("<$test_string>") return fi # Run the test T_START=$(date +%s) timeout -s INT -k $TIMEOUT $TIMEOUT "${test_exec[@]}" local status=$? T_END=$(date +%s) if [ -e ./core ]; then mv core "core-$test_name" fi # Check test status if [ "$status" -eq 124 ]; then echo "Test $test_name timed out (may not be a failure)" TIMED_OUT+=("<$test_string>") elif [ "$status" -eq 77 ]; then echo "Skipped" SKIPPED+=("<$test_string>") elif [ "$status" -ne 0 ]; then echo "Test $test_name failed with ret $status" FAILED+=("<$test_string>") elif ! _check_dmesg "$dmesg_marker" "$test_name"; then echo "Test $test_name failed dmesg check" FAILED+=("<$test_string>") else if [ -f "output/$out_name" ]; then T_PREV=$(cat "output/$out_name") else T_PREV="" fi T_DIFF=$((T_END-T_START)) if [ -n "$T_PREV" ]; then echo "$T_DIFF sec [$T_PREV]" else echo "$T_DIFF sec" fi echo $T_DIFF > "output/$out_name" fi } # Run all specified tests for tst in "${TESTS[@]}"; do if [ ! -d output ]; then mkdir -p output fi if [ -z "${TEST_MAP[$tst]}" ]; then run_test "$tst" if [ -n "$TEST_FILES" ]; then for dev in $TEST_FILES; do run_test "$tst" "$dev" done fi else run_test "$tst" "${TEST_MAP[$tst]}" fi done if [ "${#TIMED_OUT[*]}" -ne 0 ]; then echo "Tests timed out (${#TIMED_OUT[*]}): ${TIMED_OUT[*]}" fi if [ "${#FAILED[*]}" -ne 0 ]; then echo "Tests failed (${#FAILED[*]}): ${FAILED[*]}" exit 1 elif [ "${#SKIPPED[*]}" -ne 0 ] && [ -n "$TEST_GNU_EXITCODE" ]; then exit 77 else echo "All tests passed" exit 0 fi liburing-2.6/test/rw_merge_test.c000066400000000000000000000042121461424365000171710ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Regression test for incorrect async_list io_should_merge() logic * Bug was fixed in 5.5 by (commit: 561fb04 io_uring: replace workqueue usage with io-wq") * Affects 5.4 lts branch, at least 5.4.106 is affected. */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret, fd, pipe1[2]; char buf[4096]; struct iovec vec = { .iov_base = buf, .iov_len = sizeof(buf) }; struct __kernel_timespec ts = {.tv_sec = 3, .tv_nsec = 0}; if (argc > 1) return 0; ret = pipe(pipe1); assert(!ret); fd = open("testfile", O_RDWR | O_CREAT, 0644); assert(fd >= 0); unlink("testfile"); ret = ftruncate(fd, 4096); assert(!ret); ret = t_create_ring(4, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret < 0) return 1; /* REQ1 */ sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, pipe1[0], &vec, 1, 0); sqe->user_data = 1; /* REQ2 */ sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fd, &vec, 1, 4096); sqe->user_data = 2; ret = io_uring_submit(&ring); assert(ret == 2); ret = io_uring_wait_cqe(&ring, &cqe); assert(!ret); assert(cqe->res == 0); assert(cqe->user_data == 2); io_uring_cqe_seen(&ring, cqe); /* * REQ3 * Prepare request adjacent to previous one, so merge logic may want to * link it to previous request, but because of a bug in merge logic * it may be merged with request */ sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fd, &vec, 1, 2048); sqe->user_data = 3; ret = io_uring_submit(&ring); assert(ret == 1); /* * Read may stuck because of bug there request was be incorrectly * merged with request */ ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts); if (ret == -ETIME) { printf("TEST_FAIL: readv req3 stuck\n"); return 1; } assert(!ret); assert(cqe->res == 2048); assert(cqe->user_data == 3); io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } liburing-2.6/test/self.c000066400000000000000000000033411461424365000152560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that pathname resolution works from async context when * using /proc/self/ which should be the original submitting task, not the * async worker. * */ #include #include #include #include #include #include #include "liburing.h" static int io_openat2(struct io_uring *ring, const char *path, int dfd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct open_how how; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } memset(&how, 0, sizeof(how)); how.flags = O_RDONLY; io_uring_prep_openat2(sqe, dfd, path, &how); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return -1; } int main(int argc, char *argv[]) { struct io_uring ring; char buf[64]; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = io_openat2(&ring, "/proc/self/comm", -1); if (ret < 0) { if (ret == -EOPNOTSUPP) return 0; if (ret == -EINVAL) { fprintf(stdout, "openat2 not supported, skipping\n"); return 0; } fprintf(stderr, "openat2 failed: %s\n", strerror(-ret)); return 1; } memset(buf, 0, sizeof(buf)); ret = read(ret, buf, sizeof(buf)); if (ret < 0) { perror("read"); return 1; } if (strncmp(buf, "self", 4)) { fprintf(stderr, "got comm=<%s>, wanted \n", buf); return 1; } return 0; } liburing-2.6/test/send-zerocopy.c000066400000000000000000000533211461424365000171310ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define MAX_MSG 128 #define HOST "127.0.0.1" #define HOSTV6 "::1" #define MAX_IOV 32 #define CORK_REQS 5 #define RX_TAG 10000 #define BUFFER_OFFSET 41 #ifndef ARRAY_SIZE #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) #endif enum { BUF_T_NORMAL, BUF_T_SMALL, BUF_T_NONALIGNED, BUF_T_LARGE, BUF_T_HUGETLB, __BUF_NR, }; /* 32MB, should be enough to trigger a short send */ #define LARGE_BUF_SIZE (1U << 25) static size_t page_sz; static char *tx_buffer, *rx_buffer; static struct iovec buffers_iov[__BUF_NR]; static bool has_sendzc; static bool has_sendmsg; static int probe_zc_support(void) { struct io_uring ring; struct io_uring_probe *p; int ret; has_sendzc = has_sendmsg = false; ret = io_uring_queue_init(1, &ring, 0); if (ret) return -1; p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); if (!p) return -1; ret = io_uring_register_probe(&ring, p, 256); if (ret) return -1; has_sendzc = p->ops_len > IORING_OP_SEND_ZC; has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC; io_uring_queue_exit(&ring); free(p); return 0; } static bool check_cq_empty(struct io_uring *ring) { struct io_uring_cqe *cqe = NULL; int ret; ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */ return ret == -EAGAIN; } static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int msg_flags = 0; unsigned zc_flags = 0; int payload_size = 100; int ret; sqe = io_uring_get_sqe(ring); io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size, msg_flags, zc_flags); sqe->user_data = 1; ret = io_uring_submit(ring); assert(ret == 1); ret = io_uring_wait_cqe(ring, &cqe); assert(!ret && cqe->user_data == 1); if (cqe->res != payload_size) { fprintf(stderr, "send failed %i\n", cqe->res); return T_EXIT_FAIL; } assert(cqe->flags & IORING_CQE_F_MORE); io_uring_cqe_seen(ring, cqe); ret = io_uring_wait_cqe(ring, &cqe); assert(!ret); assert(cqe->user_data == 1); assert(cqe->flags & IORING_CQE_F_NOTIF); assert(!(cqe->flags & IORING_CQE_F_MORE)); io_uring_cqe_seen(ring, cqe); assert(check_cq_empty(ring)); ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC); assert(ret == payload_size); return T_EXIT_PASS; } static int test_send_faults_check(struct io_uring *ring, int expected) { struct io_uring_cqe *cqe; int ret, nr_cqes = 0; bool more = true; while (more) { nr_cqes++; ret = io_uring_wait_cqe(ring, &cqe); assert(!ret); assert(cqe->user_data == 1); if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) { fprintf(stderr, "test_send_faults_check notif came first\n"); return -1; } if (!(cqe->flags & IORING_CQE_F_NOTIF)) { if (cqe->res != expected) { fprintf(stderr, "invalid cqe res %i vs expected %i, " "user_data %i\n", cqe->res, expected, (int)cqe->user_data); return -1; } } else { if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) { fprintf(stderr, "invalid notif cqe %i %i\n", cqe->res, cqe->flags); return -1; } } more = cqe->flags & IORING_CQE_F_MORE; io_uring_cqe_seen(ring, cqe); } if (nr_cqes > 2) { fprintf(stderr, "test_send_faults_check() too many CQEs %i\n", nr_cqes); return -1; } assert(check_cq_empty(ring)); return 0; } static int test_send_faults(int sock_tx, int sock_rx) { struct io_uring_sqe *sqe; int msg_flags = 0; unsigned zc_flags = 0; int ret, payload_size = 100; struct io_uring ring; ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return -1; } /* invalid buffer */ sqe = io_uring_get_sqe(&ring); io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size, msg_flags, zc_flags); sqe->user_data = 1; ret = io_uring_submit(&ring); assert(ret == 1); ret = test_send_faults_check(&ring, -EFAULT); if (ret) { fprintf(stderr, "test_send_faults with invalid buf failed\n"); return -1; } /* invalid address */ sqe = io_uring_get_sqe(&ring); io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size, msg_flags, zc_flags); io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL, sizeof(struct sockaddr_in6)); sqe->user_data = 1; ret = io_uring_submit(&ring); assert(ret == 1); ret = test_send_faults_check(&ring, -EFAULT); if (ret) { fprintf(stderr, "test_send_faults with invalid addr failed\n"); return -1; } /* invalid send/recv flags */ sqe = io_uring_get_sqe(&ring); io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size, msg_flags, ~0U); sqe->user_data = 1; ret = io_uring_submit(&ring); assert(ret == 1); ret = test_send_faults_check(&ring, -EINVAL); if (ret) { fprintf(stderr, "test_send_faults with invalid flags failed\n"); return -1; } return T_EXIT_PASS; } static int create_socketpair_ip(struct sockaddr_storage *addr, int *sock_client, int *sock_server, bool ipv6, bool client_connect, bool msg_zc, bool tcp) { socklen_t addr_size; int family, sock, listen_sock = -1; int ret; memset(addr, 0, sizeof(*addr)); if (ipv6) { struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr; family = AF_INET6; saddr->sin6_family = family; saddr->sin6_port = htons(0); addr_size = sizeof(*saddr); } else { struct sockaddr_in *saddr = (struct sockaddr_in *)addr; family = AF_INET; saddr->sin_family = family; saddr->sin_port = htons(0); saddr->sin_addr.s_addr = htonl(INADDR_ANY); addr_size = sizeof(*saddr); } /* server sock setup */ if (tcp) { sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP); } else { sock = *sock_server = socket(family, SOCK_DGRAM, 0); } if (sock < 0) { perror("socket"); return 1; } ret = bind(sock, (struct sockaddr *)addr, addr_size); if (ret < 0) { perror("bind"); return 1; } ret = getsockname(sock, (struct sockaddr *)addr, &addr_size); if (ret < 0) { fprintf(stderr, "getsockname failed %i\n", errno); return 1; } if (tcp) { ret = listen(sock, 128); assert(ret != -1); } if (ipv6) { struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr; inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr)); } else { struct sockaddr_in *saddr = (struct sockaddr_in *)addr; inet_pton(AF_INET, HOST, &saddr->sin_addr); } /* client sock setup */ if (tcp) { *sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP); assert(client_connect); } else { *sock_client = socket(family, SOCK_DGRAM, 0); } if (*sock_client < 0) { perror("socket"); return 1; } if (client_connect) { ret = connect(*sock_client, (struct sockaddr *)addr, addr_size); if (ret < 0) { perror("connect"); return 1; } } if (msg_zc) { #ifdef SO_ZEROCOPY int val = 1; /* * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc. * It's only here to test interactions with MSG_ZEROCOPY. */ if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { perror("setsockopt zc"); return 1; } #else fprintf(stderr, "no SO_ZEROCOPY\n"); return 1; #endif } if (tcp) { *sock_server = accept(listen_sock, NULL, NULL); if (!*sock_server) { fprintf(stderr, "can't accept\n"); return 1; } close(listen_sock); } return 0; } struct send_conf { bool fixed_buf; bool mix_register; bool cork; bool force_async; bool use_sendmsg; bool tcp; bool zc; bool iovec; bool long_iovec; bool poll_first; int buf_index; struct sockaddr_storage *addr; }; static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server, struct send_conf *conf) { struct iovec iov[MAX_IOV]; struct msghdr msghdr[CORK_REQS]; const unsigned zc_flags = 0; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int nr_reqs = conf->cork ? CORK_REQS : 1; int i, ret, nr_cqes, addr_len = 0; size_t send_size = buffers_iov[conf->buf_index].iov_len; size_t chunk_size = send_size / nr_reqs; size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1); char *buf = buffers_iov[conf->buf_index].iov_base; assert(MAX_IOV >= CORK_REQS); if (conf->addr) { sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family; addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); } memset(rx_buffer, 0, send_size); for (i = 0; i < nr_reqs; i++) { bool real_fixed_buf = conf->fixed_buf; size_t cur_size = chunk_size; int msg_flags = MSG_WAITALL; if (conf->mix_register) real_fixed_buf = rand() & 1; if (i != nr_reqs - 1) msg_flags |= MSG_MORE; else cur_size = chunk_size_last; sqe = io_uring_get_sqe(ring); if (!conf->use_sendmsg) { if (conf->zc) { io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size, cur_size, msg_flags, zc_flags); } else { io_uring_prep_send(sqe, sock_client, buf + i * chunk_size, cur_size, msg_flags); } if (real_fixed_buf) { sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; sqe->buf_index = conf->buf_index; } if (conf->addr) io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr, addr_len); } else { struct iovec *io; int iov_len; if (conf->zc) io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags); else io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags); if (!conf->iovec) { io = &iov[i]; iov_len = 1; iov[i].iov_len = cur_size; iov[i].iov_base = buf + i * chunk_size; } else { char *it = buf; int j; assert(nr_reqs == 1); iov_len = conf->long_iovec ? MAX_IOV : 4; io = iov; for (j = 0; j < iov_len; j++) io[j].iov_len = 1; /* first want to be easily advanced */ io[0].iov_base = it; it += io[0].iov_len; /* this should cause retry */ io[1].iov_len = chunk_size - iov_len + 1; io[1].iov_base = it; it += io[1].iov_len; /* fill the rest */ for (j = 2; j < iov_len; j++) { io[j].iov_base = it; it += io[j].iov_len; } } memset(&msghdr[i], 0, sizeof(msghdr[i])); msghdr[i].msg_iov = io; msghdr[i].msg_iovlen = iov_len; if (conf->addr) { msghdr[i].msg_name = conf->addr; msghdr[i].msg_namelen = addr_len; } } sqe->user_data = i; if (conf->force_async) sqe->flags |= IOSQE_ASYNC; if (conf->poll_first) sqe->ioprio |= IORING_RECVSEND_POLL_FIRST; if (i != nr_reqs - 1) sqe->flags |= IOSQE_IO_LINK; } sqe = io_uring_get_sqe(ring); io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL); sqe->user_data = RX_TAG; ret = io_uring_submit(ring); if (ret != nr_reqs + 1) { fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs); return 1; } nr_cqes = nr_reqs + 1; for (i = 0; i < nr_cqes; i++) { int expected = chunk_size; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret); return 1; } if (cqe->user_data == RX_TAG) { if (cqe->res != send_size) { fprintf(stderr, "rx failed res: %i, expected %i\n", cqe->res, (int)send_size); return 1; } io_uring_cqe_seen(ring, cqe); continue; } if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) { fprintf(stderr, "unexpected cflags %i res %i\n", cqe->flags, cqe->res); return 1; } if (cqe->user_data >= nr_reqs) { fprintf(stderr, "invalid user_data %lu\n", (unsigned long)cqe->user_data); return 1; } if (!(cqe->flags & IORING_CQE_F_NOTIF)) { if (cqe->flags & IORING_CQE_F_MORE) nr_cqes++; if (cqe->user_data == nr_reqs - 1) expected = chunk_size_last; if (cqe->res != expected) { fprintf(stderr, "invalid cqe->res %d expected %d\n", cqe->res, expected); return 1; } } io_uring_cqe_seen(ring, cqe); } for (i = 0; i < send_size; i++) { if (buf[i] != rx_buffer[i]) { fprintf(stderr, "botched data, first mismated byte %i, " "%u vs %u\n", i, buf[i], rx_buffer[i]); return 1; } } return 0; } static int test_inet_send(struct io_uring *ring) { struct send_conf conf; struct sockaddr_storage addr; int sock_client = -1, sock_server = -1; int ret, j, i; int buf_index; for (j = 0; j < 32; j++) { bool ipv6 = j & 1; bool client_connect = j & 2; bool msg_zc_set = j & 4; bool tcp = j & 8; bool swap_sockets = j & 16; if (tcp && !client_connect) continue; if (swap_sockets && !tcp) continue; #ifndef SO_ZEROCOPY if (msg_zc_set) continue; #endif ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6, client_connect, msg_zc_set, tcp); if (ret) { fprintf(stderr, "sock prep failed %d\n", ret); return 1; } if (swap_sockets) { int tmp_sock = sock_client; sock_client = sock_server; sock_server = tmp_sock; } for (i = 0; i < 1024; i++) { bool regbuf; conf.use_sendmsg = i & 1; conf.poll_first = i & 2; conf.fixed_buf = i & 4; conf.addr = (i & 8) ? &addr : NULL; conf.cork = i & 16; conf.mix_register = i & 32; conf.force_async = i & 64; conf.zc = i & 128; conf.iovec = i & 256; conf.long_iovec = i & 512; conf.tcp = tcp; regbuf = conf.mix_register || conf.fixed_buf; if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork)) continue; if (!conf.zc) { if (regbuf) continue; /* * Non zerocopy send w/ addr was added together with sendmsg_zc, * skip if we the kernel doesn't support it. */ if (conf.addr && !has_sendmsg) continue; } if (tcp && (conf.cork || conf.addr)) continue; if (conf.mix_register && (!conf.cork || conf.fixed_buf)) continue; if (!client_connect && conf.addr == NULL) continue; if (conf.use_sendmsg && (regbuf || !has_sendmsg)) continue; if (msg_zc_set && !conf.zc) continue; for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) { size_t len = buffers_iov[buf_index].iov_len; if (!buffers_iov[buf_index].iov_base) continue; if (!tcp && len > 4 * page_sz) continue; conf.buf_index = buf_index; ret = do_test_inet_send(ring, sock_client, sock_server, &conf); if (ret) { fprintf(stderr, "send failed fixed buf %i, " "conn %i, addr %i, cork %i\n", conf.fixed_buf, client_connect, !!conf.addr, conf.cork); return 1; } } } close(sock_client); close(sock_server); } return 0; } static int test_async_addr(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct sockaddr_storage addr; int sock_tx = -1, sock_rx = -1; struct __kernel_timespec ts; int ret; ts.tv_sec = 1; ts.tv_nsec = 0; ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false); if (ret) { fprintf(stderr, "sock prep failed %d\n", ret); return 1; } sqe = io_uring_get_sqe(ring); io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0); sqe->user_data = 2; io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr, sizeof(struct sockaddr_in6)); ret = io_uring_submit(ring); assert(ret == 2); memset(&addr, 0, sizeof(addr)); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret); return 1; } if (cqe->user_data != 1 || cqe->res != -ETIME) { fprintf(stderr, "invalid timeout res %i %i\n", (int)cqe->user_data, cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret); return 1; } if (cqe->user_data != 2 || cqe->res != 1) { fprintf(stderr, "invalid send %i %i\n", (int)cqe->user_data, cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC); assert(ret == 1); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret); return 1; } assert(cqe->flags & IORING_CQE_F_NOTIF); io_uring_cqe_seen(ring, cqe); close(sock_tx); close(sock_rx); return 0; } /* see also send_recv.c:test_invalid */ static int test_invalid_zc(int fds[2]) { struct io_uring ring; int ret; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; bool notif = false; if (!has_sendmsg) return 0; ret = t_create_ring(8, &ring, 0); if (ret) return ret; sqe = io_uring_get_sqe(&ring); io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL); sqe->opcode = IORING_OP_SENDMSG_ZC; sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit failed %i\n", ret); return ret; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) return 1; if (cqe->flags & IORING_CQE_F_MORE) notif = true; io_uring_cqe_seen(&ring, cqe); if (notif) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) return 1; io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return 0; } static int run_basic_tests(void) { struct sockaddr_storage addr; int ret, i, sp[2]; /* create TCP IPv6 pair */ ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true); if (ret) { fprintf(stderr, "sock prep failed %d\n", ret); return -1; } for (i = 0; i < 2; i++) { struct io_uring ring; unsigned ring_flags = 0; if (i & 1) ring_flags |= IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init(32, &ring, ring_flags); if (ret) { if (ret == -EINVAL) continue; fprintf(stderr, "queue init failed: %d\n", ret); return -1; } ret = test_basic_send(&ring, sp[0], sp[1]); if (ret) { fprintf(stderr, "test_basic_send() failed\n"); return -1; } ret = test_send_faults(sp[0], sp[1]); if (ret) { fprintf(stderr, "test_send_faults() failed\n"); return -1; } ret = test_invalid_zc(sp); if (ret) { fprintf(stderr, "test_invalid_zc() failed\n"); return -1; } ret = test_async_addr(&ring); if (ret) { fprintf(stderr, "test_async_addr() failed\n"); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); } close(sp[0]); close(sp[1]); return 0; } int main(int argc, char *argv[]) { size_t len; int ret, i; if (argc > 1) return T_EXIT_SKIP; ret = probe_zc_support(); if (ret) { printf("probe failed\n"); return T_EXIT_FAIL; } if (!has_sendzc) { printf("no IORING_OP_SEND_ZC support, skip\n"); return T_EXIT_SKIP; } page_sz = sysconf(_SC_PAGESIZE); len = LARGE_BUF_SIZE; tx_buffer = aligned_alloc(page_sz, len); rx_buffer = aligned_alloc(page_sz, len); if (tx_buffer && rx_buffer) { buffers_iov[BUF_T_LARGE].iov_base = tx_buffer; buffers_iov[BUF_T_LARGE].iov_len = len; } else { if (tx_buffer) free(tx_buffer); if (rx_buffer) free(rx_buffer); printf("skip large buffer tests, can't alloc\n"); len = 2 * page_sz; tx_buffer = aligned_alloc(page_sz, len); rx_buffer = aligned_alloc(page_sz, len); } if (!tx_buffer || !rx_buffer) { fprintf(stderr, "can't allocate buffers\n"); return T_EXIT_FAIL; } srand((unsigned)time(NULL)); for (i = 0; i < len; i++) tx_buffer[i] = i; memset(rx_buffer, 0, len); buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz; buffers_iov[BUF_T_NORMAL].iov_len = page_sz; buffers_iov[BUF_T_SMALL].iov_base = tx_buffer; buffers_iov[BUF_T_SMALL].iov_len = 137; buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET; buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13; if (len == LARGE_BUF_SIZE) { void *huge_page; int off = page_sz + 27; len = 1U << 22; huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, -1, 0); if (huge_page != MAP_FAILED) { buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off; buffers_iov[BUF_T_HUGETLB].iov_len = len - off; } } ret = run_basic_tests(); if (ret) return T_EXIT_FAIL; for (i = 0; i < 2; i++) { struct io_uring ring; unsigned ring_flags = 0; if (i & 1) ring_flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; ret = io_uring_queue_init(32, &ring, ring_flags); if (ret) { if (ret == -EINVAL) continue; fprintf(stderr, "queue init failed: %d\n", ret); return -1; } ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov)); if (ret == T_SETUP_SKIP) { fprintf(stderr, "can't register bufs, skip\n"); goto out; } else if (ret != T_SETUP_OK) { fprintf(stderr, "buffer registration failed %i\n", ret); return T_EXIT_FAIL; } if (buffers_iov[BUF_T_HUGETLB].iov_base) { buffers_iov[BUF_T_HUGETLB].iov_base += 13; buffers_iov[BUF_T_HUGETLB].iov_len -= 26; } if (buffers_iov[BUF_T_LARGE].iov_base) { buffers_iov[BUF_T_LARGE].iov_base += 13; buffers_iov[BUF_T_LARGE].iov_len -= 26; } ret = test_inet_send(&ring); if (ret) { fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n", ring_flags & IORING_SETUP_DEFER_TASKRUN); return T_EXIT_FAIL; } if (buffers_iov[BUF_T_HUGETLB].iov_base) { buffers_iov[BUF_T_HUGETLB].iov_base -= 13; buffers_iov[BUF_T_HUGETLB].iov_len += 26; } if (buffers_iov[BUF_T_LARGE].iov_base) { buffers_iov[BUF_T_LARGE].iov_base -= 13; buffers_iov[BUF_T_LARGE].iov_len += 26; } out: io_uring_queue_exit(&ring); } return T_EXIT_PASS; } liburing-2.6/test/send_recv.c000066400000000000000000000147501461424365000163030ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple test case showing using send and recv through io_uring */ #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static char str[] = "This is a test of send and recv over io_uring!"; #define MAX_MSG 128 #define PORT 10202 #define HOST "127.0.0.1" static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock, int registerfiles) { struct sockaddr_in saddr; struct io_uring_sqe *sqe; int sockfd, ret, val, use_fd; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); saddr.sin_port = htons(PORT); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } val = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); ret = bind(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { perror("bind"); goto err; } if (registerfiles) { ret = io_uring_register_files(ring, &sockfd, 1); if (ret) { fprintf(stderr, "file reg failed\n"); goto err; } use_fd = 0; } else { use_fd = sockfd; } sqe = io_uring_get_sqe(ring); io_uring_prep_recv(sqe, use_fd, iov->iov_base, iov->iov_len, 0); if (registerfiles) sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } *sock = sockfd; return 0; err: close(sockfd); return 1; } static int do_recv(struct io_uring *ring, struct iovec *iov) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stdout, "wait_cqe: %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "recv not supported, skipping\n"); return 0; } if (cqe->res < 0) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } if (cqe->res -1 != strlen(str)) { fprintf(stderr, "got wrong length: %d/%d\n", cqe->res, (int) strlen(str) + 1); goto err; } if (strcmp(str, iov->iov_base)) { fprintf(stderr, "string mismatch\n"); goto err; } return 0; err: return 1; } struct recv_data { pthread_mutex_t mutex; int use_sqthread; int registerfiles; }; static void *recv_fn(void *data) { struct recv_data *rd = data; char buf[MAX_MSG + 1]; struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) - 1, }; struct io_uring_params p = { }; struct io_uring ring; int ret, sock; if (rd->use_sqthread) p.flags = IORING_SETUP_SQPOLL; ret = t_create_ring_params(1, &ring, &p); if (ret == T_SETUP_SKIP) { pthread_mutex_unlock(&rd->mutex); ret = 0; goto err; } else if (ret < 0) { pthread_mutex_unlock(&rd->mutex); goto err; } if (rd->use_sqthread && !rd->registerfiles) { if (!(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { fprintf(stdout, "Non-registered SQPOLL not available, skipping\n"); pthread_mutex_unlock(&rd->mutex); goto err; } } ret = recv_prep(&ring, &iov, &sock, rd->registerfiles); if (ret) { fprintf(stderr, "recv_prep failed: %d\n", ret); goto err; } pthread_mutex_unlock(&rd->mutex); ret = do_recv(&ring, &iov); close(sock); io_uring_queue_exit(&ring); err: return (void *)(intptr_t)ret; } static int do_send(void) { struct sockaddr_in saddr; struct iovec iov = { .iov_base = str, .iov_len = sizeof(str), }; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return 1; } memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = htons(PORT); inet_pton(AF_INET, HOST, &saddr.sin_addr); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); goto err2; } ret = connect(sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { perror("connect"); goto err; } sqe = io_uring_get_sqe(&ring); io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res == -EINVAL) { fprintf(stdout, "send not supported, skipping\n"); goto err; } if (cqe->res != iov.iov_len) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } close(sockfd); io_uring_queue_exit(&ring); return 0; err: close(sockfd); err2: io_uring_queue_exit(&ring); return 1; } static int test(int use_sqthread, int regfiles) { pthread_mutexattr_t attr; pthread_t recv_thread; struct recv_data rd; int ret; void *retval; pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, 1); pthread_mutex_init(&rd.mutex, &attr); pthread_mutex_lock(&rd.mutex); rd.use_sqthread = use_sqthread; rd.registerfiles = regfiles; ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); if (ret) { fprintf(stderr, "Thread create failed: %d\n", ret); pthread_mutex_unlock(&rd.mutex); return 1; } pthread_mutex_lock(&rd.mutex); do_send(); pthread_join(recv_thread, &retval); return (intptr_t)retval; } static int test_invalid(void) { struct io_uring ring; int ret, i; int fds[2]; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; ret = t_create_ring(8, &ring, IORING_SETUP_SUBMIT_ALL); if (ret) { if (ret == -EINVAL) return 0; return ret; } ret = t_create_socket_pair(fds, true); if (ret) return ret; sqe = io_uring_get_sqe(&ring); io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL); sqe->flags |= IOSQE_ASYNC; sqe = io_uring_get_sqe(&ring); io_uring_prep_recvmsg(sqe, fds[1], NULL, 0); sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit_and_wait(&ring, 2); if (ret != 2) return ret; for (i = 0; i < 2; i++) { ret = io_uring_peek_cqe(&ring, &cqe); if (ret || cqe->res != -EFAULT) return -1; io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); close(fds[0]); close(fds[1]); return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_invalid(); if (ret) { fprintf(stderr, "test_invalid failed\n"); return ret; } ret = test(0, 0); if (ret) { fprintf(stderr, "test sqthread=0 failed\n"); return ret; } ret = test(1, 1); if (ret) { fprintf(stderr, "test sqthread=1 reg=1 failed\n"); return ret; } ret = test(1, 0); if (ret) { fprintf(stderr, "test sqthread=1 reg=0 failed\n"); return ret; } return 0; } liburing-2.6/test/send_recvmsg.c000066400000000000000000000216741461424365000170150ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple test case showing using sendmsg and recvmsg through io_uring */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" static char str[] = "This is a test of sendmsg and recvmsg over io_uring!"; static int ud; #define MAX_MSG 128 #define PORT 10203 #define HOST "127.0.0.1" #define BUF_BGID 10 #define BUF_BID 89 #define MAX_IOV_COUNT 10 static int no_pbuf_ring; static int recv_prep(struct io_uring *ring, int *sockfd, struct iovec iov[], int iov_count, int bgid, int async) { struct sockaddr_in saddr; struct msghdr msg; struct io_uring_sqe *sqe; int ret, val = 1; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); saddr.sin_port = htons(PORT); *sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (*sockfd < 0) { perror("socket"); return 1; } val = 1; setsockopt(*sockfd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); ret = bind(*sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); if (ret < 0) { perror("bind"); goto err; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "io_uring_get_sqe failed\n"); return 1; } io_uring_prep_recvmsg(sqe, *sockfd, &msg, 0); if (bgid) { iov->iov_base = NULL; sqe->flags |= IOSQE_BUFFER_SELECT; sqe->buf_group = bgid; iov_count = 1; } sqe->user_data = ++ud; if (async) sqe->flags |= IOSQE_ASYNC; memset(&msg, 0, sizeof(msg)); msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = iov; msg.msg_iovlen = iov_count; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } return 0; err: close(*sockfd); return 1; } struct recv_data { pthread_mutex_t *mutex; int buf_select; int buf_ring; int no_buf_add; int iov_count; int async; }; static int do_recvmsg(struct io_uring *ring, char buf[MAX_MSG + 1], struct recv_data *rd) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stdout, "wait_cqe: %d\n", ret); goto err; } if (cqe->res < 0) { if (rd->no_buf_add && (rd->buf_select || rd->buf_ring)) return 0; fprintf(stderr, "%s: failed cqe: %d\n", __FUNCTION__, cqe->res); goto err; } if (cqe->flags & IORING_CQE_F_BUFFER) { int bid = cqe->flags >> 16; if (bid != BUF_BID) fprintf(stderr, "Buffer ID mismatch %d\n", bid); } if (rd->no_buf_add && (rd->buf_ring || rd->buf_select)) { fprintf(stderr, "Expected -ENOBUFS: %d\n", cqe->res); goto err; } if (cqe->res -1 != strlen(str)) { fprintf(stderr, "got wrong length: %d/%d\n", cqe->res, (int) strlen(str) + 1); goto err; } if (strncmp(str, buf, MAX_MSG + 1)) { fprintf(stderr, "string mismatch\n"); goto err; } return 0; err: return 1; } static void init_iov(struct iovec iov[MAX_IOV_COUNT], int iov_to_use, char buf[MAX_MSG + 1]) { int i, last_idx = iov_to_use - 1; assert(0 < iov_to_use && iov_to_use <= MAX_IOV_COUNT); for (i = 0; i < last_idx; ++i) { iov[i].iov_base = buf + i; iov[i].iov_len = 1; } iov[last_idx].iov_base = buf + last_idx; iov[last_idx].iov_len = MAX_MSG - last_idx; } static void *recv_fn(void *data) { struct recv_data *rd = data; pthread_mutex_t *mutex = rd->mutex; struct io_uring_buf_ring *br = NULL; char buf[MAX_MSG + 1]; struct iovec iov[MAX_IOV_COUNT]; struct io_uring ring; int ret, sockfd; if (rd->buf_ring && no_pbuf_ring) goto out_no_ring; init_iov(iov, rd->iov_count, buf); ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); goto err; } if ((rd->buf_ring || rd->buf_select) && !rd->no_buf_add) { if (rd->buf_ring) { br = io_uring_setup_buf_ring(&ring, 1, BUF_BGID, 0, &ret); if (!br) { no_pbuf_ring = 1; goto out; } io_uring_buf_ring_add(br, buf, sizeof(buf), BUF_BID, io_uring_buf_ring_mask(1), 0); io_uring_buf_ring_advance(br, 1); } else { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(&ring); io_uring_prep_provide_buffers(sqe, buf, sizeof(buf) -1, 1, BUF_BGID, BUF_BID); sqe->user_data = ++ud; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit ret=%d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(&ring, cqe); if (ret == -EINVAL) { fprintf(stdout, "PROVIDE_BUFFERS not supported, skip\n"); goto out; } else if (ret < 0) { fprintf(stderr, "PROVIDER_BUFFERS %d\n", ret); goto err; } } } ret = recv_prep(&ring, &sockfd, iov, rd->iov_count, (rd->buf_ring || rd->buf_select) ? BUF_BGID : 0, rd->async); if (ret) { fprintf(stderr, "recv_prep failed: %d\n", ret); goto err; } pthread_mutex_unlock(mutex); ret = do_recvmsg(&ring, buf, rd); close(sockfd); if (br) io_uring_free_buf_ring(&ring, br, 1, BUF_BGID); io_uring_queue_exit(&ring); err: return (void *)(intptr_t)ret; out: io_uring_queue_exit(&ring); out_no_ring: pthread_mutex_unlock(mutex); if (br) io_uring_free_buf_ring(&ring, br, 1, BUF_BGID); return NULL; } static int do_sendmsg(void) { struct sockaddr_in saddr; struct iovec iov = { .iov_base = str, .iov_len = sizeof(str), }; struct msghdr msg; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return 1; } memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = htons(PORT); inet_pton(AF_INET, HOST, &saddr.sin_addr); memset(&msg, 0, sizeof(msg)); msg.msg_name = &saddr; msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = &iov; msg.msg_iovlen = 1; sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } usleep(10000); sqe = io_uring_get_sqe(&ring); io_uring_prep_sendmsg(sqe, sockfd, &msg, 0); sqe->user_data = ++ud; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res < 0) { fprintf(stderr, "%s: failed cqe: %d\n", __FUNCTION__, cqe->res); goto err; } close(sockfd); return 0; err: close(sockfd); return 1; } static int test(int buf_select, int buf_ring, int no_buf_add, int iov_count, int async) { struct recv_data rd; pthread_mutexattr_t attr; pthread_t recv_thread; pthread_mutex_t mutex; int ret; void *retval; if (buf_select || buf_ring) assert(iov_count == 1); pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, 1); pthread_mutex_init(&mutex, &attr); pthread_mutex_lock(&mutex); rd.mutex = &mutex; rd.buf_select = buf_select; rd.buf_ring = buf_ring; rd.no_buf_add = no_buf_add; rd.iov_count = iov_count; rd.async = async; ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); if (ret) { pthread_mutex_unlock(&mutex); fprintf(stderr, "Thread create failed\n"); return 1; } pthread_mutex_lock(&mutex); do_sendmsg(); pthread_join(recv_thread, &retval); ret = (intptr_t)retval; return ret; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test(0, 0, 0, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 0 0 0 1 0 failed\n"); return 1; } ret = test(0, 0, 0, 10, 0); if (ret) { fprintf(stderr, "send_recvmsg multi iov failed\n"); return 1; } ret = test(1, 0, 0, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 1 0 0 1 0 failed\n"); return 1; } ret = test(1, 0, 1, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 1 0 1 1 0 failed\n"); return 1; } ret = test(0, 1, 0, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 0 1 0 1 0 failed\n"); return 1; } ret = test(1, 1, 0, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 1 1 0 1 0 failed\n"); return 1; } ret = test(1, 1, 1, 1, 0); if (ret) { fprintf(stderr, "send_recvmsg 1 1 1 1 0 failed\n"); return 1; } ret = test(0, 0, 0, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 0 0 0 1 1 failed\n"); return 1; } ret = test(0, 0, 0, 10, 1); if (ret) { fprintf(stderr, "send_recvmsg async multi iov failed\n"); return 1; } ret = test(1, 0, 0, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 1 0 0 1 1 failed\n"); return 1; } ret = test(1, 0, 1, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 1 0 1 1 1 failed\n"); return 1; } ret = test(0, 1, 0, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 0 1 0 1 1 failed\n"); return 1; } ret = test(1, 1, 0, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 1 1 0 1 1 failed\n"); return 1; } ret = test(1, 1, 1, 1, 1); if (ret) { fprintf(stderr, "send_recvmsg async 1 1 1 1 1 failed\n"); return 1; } return 0; } liburing-2.6/test/shared-wq.c000066400000000000000000000027631461424365000162270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test wq sharing */ #include #include #include #include #include #include #include "liburing.h" static int test_attach_invalid(int ringfd) { struct io_uring_params p; struct io_uring ring; int ret; memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_ATTACH_WQ; p.wq_fd = ringfd; ret = io_uring_queue_init_params(1, &ring, &p); if (ret != -EINVAL) { fprintf(stderr, "Attach to zero: %d\n", ret); goto err; } return 0; err: return 1; } static int test_attach(int ringfd) { struct io_uring_params p; struct io_uring ring2; int ret; memset(&p, 0, sizeof(p)); p.flags = IORING_SETUP_ATTACH_WQ; p.wq_fd = ringfd; ret = io_uring_queue_init_params(1, &ring2, &p); if (ret == -EINVAL) { fprintf(stdout, "Sharing not supported, skipping\n"); return 0; } else if (ret) { fprintf(stderr, "Attach to id: %d\n", ret); goto err; } io_uring_queue_exit(&ring2); return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } /* stdout is definitely not an io_uring descriptor */ ret = test_attach_invalid(2); if (ret) { fprintf(stderr, "test_attach_invalid failed\n"); return ret; } ret = test_attach(ring.ring_fd); if (ret) { fprintf(stderr, "test_attach failed\n"); return ret; } return 0; } liburing-2.6/test/short-read.c000066400000000000000000000025421461424365000163770ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define BUF_SIZE 4096 #define FILE_SIZE 1024 int main(int argc, char *argv[]) { int ret, fd, save_errno; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct iovec vec; if (argc > 1) return 0; vec.iov_base = t_malloc(BUF_SIZE); vec.iov_len = BUF_SIZE; t_create_file(".short-read", FILE_SIZE); fd = open(".short-read", O_RDONLY); save_errno = errno; unlink(".short-read"); errno = save_errno; if (fd < 0) { perror("file open"); return 1; } ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return ret; } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); return 1; } io_uring_prep_readv(sqe, fd, &vec, 1, 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); return 1; } ret = io_uring_wait_cqes(&ring, &cqe, 1, 0, 0); if (ret) { fprintf(stderr, "wait_cqe failed: %d\n", ret); return 1; } if (cqe->res != FILE_SIZE) { fprintf(stderr, "Read failed: %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); return 0; } liburing-2.6/test/shutdown.c000066400000000000000000000064501461424365000162040ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that writev on a socket that has been shutdown(2) fails * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static void sig_pipe(int sig) { } int main(int argc, char *argv[]) { int p_fd[2], ret; int32_t recv_s0; int32_t val = 1; struct sockaddr_in addr = { }; if (argc > 1) return 0; srand(getpid()); recv_s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(recv_s0, &addr); assert(!ret); ret = listen(recv_s0, 128); assert(ret != -1); p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); val = 1; ret = setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); int32_t flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); ret = connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)); assert(ret == -1); flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags &= ~O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); p_fd[0] = accept(recv_s0, NULL, NULL); assert(p_fd[0] != -1); signal(SIGPIPE, sig_pipe); while (1) { int32_t code; socklen_t code_len = sizeof(code); ret = getsockopt(p_fd[1], SOL_SOCKET, SO_ERROR, &code, &code_len); assert(ret != -1); if (!code) break; } struct io_uring m_io_uring; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int res; sqe = io_uring_get_sqe(&m_io_uring); io_uring_prep_shutdown(sqe, p_fd[1], SHUT_WR); sqe->user_data = 1; res = io_uring_submit_and_wait(&m_io_uring, 1); assert(res != -1); res = io_uring_wait_cqe(&m_io_uring, &cqe); if (res < 0) { fprintf(stderr, "wait: %s\n", strerror(-ret)); goto err; } if (cqe->res) { if (cqe->res == -EINVAL) { fprintf(stdout, "Shutdown not supported, skipping\n"); goto done; } fprintf(stderr, "writev: %d\n", cqe->res); goto err; } io_uring_cqe_seen(&m_io_uring, cqe); } { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec iov[1]; char send_buff[128]; int res; iov[0].iov_base = send_buff; iov[0].iov_len = sizeof(send_buff); sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_writev(sqe, p_fd[1], iov, 1, 0); res = io_uring_submit_and_wait(&m_io_uring, 1); assert(res != -1); res = io_uring_wait_cqe(&m_io_uring, &cqe); if (res < 0) { fprintf(stderr, "wait: %s\n", strerror(-ret)); goto err; } if (cqe->res != -EPIPE) { fprintf(stderr, "writev: %d\n", cqe->res); goto err; } io_uring_cqe_seen(&m_io_uring, cqe); } done: io_uring_queue_exit(&m_io_uring); return 0; err: io_uring_queue_exit(&m_io_uring); return 1; } liburing-2.6/test/sigfd-deadlock.c000066400000000000000000000033311461424365000171640ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that sigfd reading/polling works. A regression test for * the upstream commit: * * fd7d6de22414 ("io_uring: don't recurse on tsk->sighand->siglock with signalfd") */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int setup_signal(void) { sigset_t mask; int sfd; sigemptyset(&mask); sigaddset(&mask, SIGINT); sigprocmask(SIG_BLOCK, &mask, NULL); sfd = signalfd(-1, &mask, SFD_NONBLOCK); if (sfd < 0) perror("signalfd"); return sfd; } static int test_uring(int sfd) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct io_uring ring; int ret; ret = io_uring_queue_init(32, &ring, 0); if (ret) return T_EXIT_FAIL; sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, sfd, POLLIN); ret = io_uring_submit(&ring); if (ret < 0) { ret = T_EXIT_FAIL; goto err_exit; } kill(getpid(), SIGINT); io_uring_wait_cqe(&ring, &cqe); if (cqe->res == -EOPNOTSUPP) { fprintf(stderr, "signalfd poll not supported\n"); ret = T_EXIT_SKIP; } else if (cqe->res < 0) { fprintf(stderr, "poll failed: %d\n", cqe->res); ret = T_EXIT_FAIL; } else if (cqe->res & POLLIN) { ret = T_EXIT_PASS; } else { fprintf(stderr, "Unexpected poll mask %x\n", cqe->res); ret = T_EXIT_FAIL; } io_uring_cqe_seen(&ring, cqe); err_exit: io_uring_queue_exit(&ring); return ret; } int main(int argc, char *argv[]) { int sfd, ret; if (argc > 1) return T_EXIT_PASS; sfd = setup_signal(); if (sfd < 0) return T_EXIT_FAIL; ret = test_uring(sfd); if (ret == T_EXIT_FAIL) fprintf(stderr, "test_uring signalfd failed\n"); close(sfd); return ret; } liburing-2.6/test/single-issuer.c000066400000000000000000000073401461424365000171210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "test.h" #include "helpers.h" static pid_t pid; static pid_t fork_t(void) { pid = fork(); if (pid == -1) { fprintf(stderr, "fork failed\n"); exit(T_EXIT_FAIL); } return pid; } static void wait_child_t(void) { int wstatus; if (waitpid(pid, &wstatus, 0) == (pid_t)-1) { perror("waitpid()"); exit(T_EXIT_FAIL); } if (!WIFEXITED(wstatus)) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); exit(T_EXIT_FAIL); } if (WEXITSTATUS(wstatus)) exit(T_EXIT_FAIL); } static int try_submit(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); sqe->user_data = 42; ret = io_uring_submit(ring); if (ret < 0) return ret; if (ret != 1) t_error(1, ret, "submit %i", ret); ret = io_uring_wait_cqe(ring, &cqe); if (ret) t_error(1, ret, "wait fail %i", ret); if (cqe->res || cqe->user_data != 42) t_error(1, ret, "invalid cqe"); io_uring_cqe_seen(ring, cqe); return 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER); if (ret == -EINVAL) { return T_EXIT_SKIP; } else if (ret) { fprintf(stderr, "io_uring_queue_init() failed %i\n", ret); return T_EXIT_FAIL; } /* test that the creator iw allowed to submit */ ret = try_submit(&ring); if (ret) { fprintf(stderr, "the creator can't submit %i\n", ret); return T_EXIT_FAIL; } /* test that a second submitter doesn't succeed */ if (!fork_t()) { ret = try_submit(&ring); if (ret != -EEXIST) fprintf(stderr, "1: not owner child could submit %i\n", ret); return ret != -EEXIST; } wait_child_t(); io_uring_queue_exit(&ring); /* test that the first submitter but not creator can submit */ ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_R_DISABLED); if (ret) t_error(1, ret, "ring init (2) %i", ret); if (!fork_t()) { io_uring_enable_rings(&ring); ret = try_submit(&ring); if (ret) fprintf(stderr, "2: not owner child could submit %i\n", ret); return !!ret; } wait_child_t(); io_uring_queue_exit(&ring); /* test that only the first enabler can submit */ ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_R_DISABLED); if (ret) t_error(1, ret, "ring init (3) %i", ret); io_uring_enable_rings(&ring); if (!fork_t()) { ret = try_submit(&ring); if (ret != -EEXIST) fprintf(stderr, "3: not owner child could submit %i\n", ret); return ret != -EEXIST; } wait_child_t(); io_uring_queue_exit(&ring); /* test that anyone can submit to a SQPOLL|SINGLE_ISSUER ring */ ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER|IORING_SETUP_SQPOLL); if (ret) t_error(1, ret, "ring init (4) %i", ret); ret = try_submit(&ring); if (ret) { fprintf(stderr, "SQPOLL submit failed (creator) %i\n", ret); return T_EXIT_FAIL; } if (!fork_t()) { ret = try_submit(&ring); if (ret) fprintf(stderr, "SQPOLL submit failed (child) %i\n", ret); return !!ret; } wait_child_t(); io_uring_queue_exit(&ring); /* test that IORING_ENTER_REGISTERED_RING doesn't break anything */ ret = io_uring_queue_init(8, &ring, IORING_SETUP_SINGLE_ISSUER); if (ret) t_error(1, ret, "ring init (5) %i", ret); if (!fork_t()) { ret = try_submit(&ring); if (ret != -EEXIST) fprintf(stderr, "4: not owner child could submit %i\n", ret); return ret != -EEXIST; } wait_child_t(); io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/skip-cqe.c000066400000000000000000000226131461424365000160440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define LINK_SIZE 6 #define TIMEOUT_USER_DATA (-1) static int fds[2]; /* should be successfully submitted but fails during execution */ static void prep_exec_fail_req(struct io_uring_sqe *sqe) { io_uring_prep_write(sqe, fds[1], NULL, 100, 0); } static int test_link_success(struct io_uring *ring, int nr, bool skip_last) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; for (i = 0; i < nr; ++i) { sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); if (i != nr - 1 || skip_last) sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; sqe->user_data = i; } ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } if (!skip_last) { ret = io_uring_wait_cqe(ring, &cqe); if (ret != 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res != 0) { fprintf(stderr, "nop failed: res %d\n", cqe->res); goto err; } if (cqe->user_data != nr - 1) { fprintf(stderr, "invalid user_data %i\n", (int)cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); } if (io_uring_peek_cqe(ring, &cqe) >= 0) { fprintf(stderr, "single CQE expected %i\n", (int)cqe->user_data); goto err; } return 0; err: return 1; } static int test_link_fail(struct io_uring *ring, int nr, int fail_idx) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; for (i = 0; i < nr; ++i) { sqe = io_uring_get_sqe(ring); if (i == fail_idx) prep_exec_fail_req(sqe); else io_uring_prep_nop(sqe); if (i != nr - 1) sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; sqe->user_data = i; } ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret != 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (!cqe->res || cqe->user_data != fail_idx) { fprintf(stderr, "got: user_data %d res %d, expected data: %d\n", (int)cqe->user_data, cqe->res, fail_idx); goto err; } io_uring_cqe_seen(ring, cqe); if (io_uring_peek_cqe(ring, &cqe) >= 0) { fprintf(stderr, "single CQE expected %i\n", (int)cqe->user_data); goto err; } return 0; err: return 1; } static int test_ltimeout_cancel(struct io_uring *ring, int nr, int tout_idx, bool async, int fail_idx) { struct __kernel_timespec ts = {.tv_sec = 1, .tv_nsec = 0}; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; int e_res = 0, e_idx = nr - 1; if (fail_idx >= 0) { e_res = -EFAULT; e_idx = fail_idx; } for (i = 0; i < nr; ++i) { sqe = io_uring_get_sqe(ring); if (i == fail_idx) prep_exec_fail_req(sqe); else io_uring_prep_nop(sqe); sqe->user_data = i; sqe->flags |= IOSQE_IO_LINK; if (async) sqe->flags |= IOSQE_ASYNC; if (i != nr - 1) sqe->flags |= IOSQE_CQE_SKIP_SUCCESS; if (i == tout_idx) { sqe = io_uring_get_sqe(ring); io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; sqe->user_data = TIMEOUT_USER_DATA; } } ret = io_uring_submit(ring); if (ret != nr + 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret != 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->user_data != e_idx) { fprintf(stderr, "invalid user_data %i\n", (int)cqe->user_data); goto err; } if (cqe->res != e_res) { fprintf(stderr, "unexpected res: %d\n", cqe->res); goto err; } io_uring_cqe_seen(ring, cqe); if (io_uring_peek_cqe(ring, &cqe) >= 0) { fprintf(stderr, "single CQE expected %i\n", (int)cqe->user_data); goto err; } return 0; err: return 1; } static int test_ltimeout_fire(struct io_uring *ring, bool async, bool skip_main, bool skip_tout) { char buf[1]; struct __kernel_timespec ts = {.tv_sec = 0, .tv_nsec = 1000000}; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; int nr = 1 + !skip_tout; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); sqe->flags |= IOSQE_IO_LINK; sqe->flags |= async ? IOSQE_ASYNC : 0; sqe->flags |= skip_main ? IOSQE_CQE_SKIP_SUCCESS : 0; sqe->user_data = 0; sqe = io_uring_get_sqe(ring); io_uring_prep_link_timeout(sqe, &ts, 0); sqe->flags |= skip_tout ? IOSQE_CQE_SKIP_SUCCESS : 0; sqe->user_data = 1; ret = io_uring_submit(ring); if (ret != 2) { fprintf(stderr, "sqe submit failed: %d\n", ret); return 1; } for (i = 0; i < nr; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret != 0) { fprintf(stderr, "wait completion %d\n", ret); return 1; } switch (cqe->user_data) { case 0: if (cqe->res != -ECANCELED && cqe->res != -EINTR) { fprintf(stderr, "unexpected read return: %d\n", cqe->res); return 1; } break; case 1: if (skip_tout) { fprintf(stderr, "extra timeout cqe, %d\n", cqe->res); return 1; } break; } io_uring_cqe_seen(ring, cqe); } if (io_uring_peek_cqe(ring, &cqe) >= 0) { fprintf(stderr, "single CQE expected: got data: %i res: %i\n", (int)cqe->user_data, cqe->res); return 1; } return 0; } static int test_hardlink(struct io_uring *ring, int nr, int fail_idx, int skip_idx, bool hardlink_last) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret, i; assert(fail_idx < nr); assert(skip_idx < nr); for (i = 0; i < nr; i++) { sqe = io_uring_get_sqe(ring); if (i == fail_idx) prep_exec_fail_req(sqe); else io_uring_prep_nop(sqe); if (i != nr - 1 || hardlink_last) sqe->flags |= IOSQE_IO_HARDLINK; if (i == skip_idx) sqe->flags |= IOSQE_CQE_SKIP_SUCCESS; sqe->user_data = i; } ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } for (i = 0; i < nr; i++) { if (i == skip_idx && fail_idx != skip_idx) continue; ret = io_uring_wait_cqe(ring, &cqe); if (ret != 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->user_data != i) { fprintf(stderr, "invalid user_data %d (%i)\n", (int)cqe->user_data, i); goto err; } if (i == fail_idx) { if (cqe->res >= 0) { fprintf(stderr, "req should've failed %d %d\n", (int)cqe->user_data, cqe->res); goto err; } } else { if (cqe->res) { fprintf(stderr, "req error %d %d\n", (int)cqe->user_data, cqe->res); goto err; } } io_uring_cqe_seen(ring, cqe); } if (io_uring_peek_cqe(ring, &cqe) >= 0) { fprintf(stderr, "single CQE expected %i\n", (int)cqe->user_data); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, i, j, k; int mid_idx = LINK_SIZE / 2; int last_idx = LINK_SIZE - 1; if (argc > 1) return 0; if (pipe(fds)) { fprintf(stderr, "pipe() failed\n"); return 1; } ret = io_uring_queue_init(16, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if (!(ring.features & IORING_FEAT_CQE_SKIP)) return T_EXIT_SKIP; for (i = 0; i < 4; i++) { bool skip_last = i & 1; int sz = (i & 2) ? LINK_SIZE : 1; ret = test_link_success(&ring, sz, skip_last); if (ret) { fprintf(stderr, "test_link_success sz %d, %d last\n", skip_last, sz); return ret; } } ret = test_link_fail(&ring, LINK_SIZE, mid_idx); if (ret) { fprintf(stderr, "test_link_fail mid failed\n"); return ret; } ret = test_link_fail(&ring, LINK_SIZE, last_idx); if (ret) { fprintf(stderr, "test_link_fail last failed\n"); return ret; } for (i = 0; i < 2; i++) { bool async = i & 1; ret = test_ltimeout_cancel(&ring, 1, 0, async, -1); if (ret) { fprintf(stderr, "test_ltimeout_cancel 1 failed, %i\n", async); return ret; } ret = test_ltimeout_cancel(&ring, LINK_SIZE, mid_idx, async, -1); if (ret) { fprintf(stderr, "test_ltimeout_cancel mid failed, %i\n", async); return ret; } ret = test_ltimeout_cancel(&ring, LINK_SIZE, last_idx, async, -1); if (ret) { fprintf(stderr, "test_ltimeout_cancel last failed, %i\n", async); return ret; } ret = test_ltimeout_cancel(&ring, LINK_SIZE, mid_idx, async, mid_idx); if (ret) { fprintf(stderr, "test_ltimeout_cancel fail mid failed, %i\n", async); return ret; } ret = test_ltimeout_cancel(&ring, LINK_SIZE, mid_idx, async, mid_idx - 1); if (ret) { fprintf(stderr, "test_ltimeout_cancel fail2 mid failed, %i\n", async); return ret; } ret = test_ltimeout_cancel(&ring, LINK_SIZE, mid_idx, async, mid_idx + 1); if (ret) { fprintf(stderr, "test_ltimeout_cancel fail3 mid failed, %i\n", async); return ret; } } for (i = 0; i < 8; i++) { bool async = i & 1; bool skip1 = i & 2; bool skip2 = i & 4; ret = test_ltimeout_fire(&ring, async, skip1, skip2); if (ret) { fprintf(stderr, "test_ltimeout_fire failed\n"); return ret; } } /* test 3 positions, start/middle/end of the link, i.e. indexes 0, 3, 6 */ for (i = 0; i < 3; i++) { for (j = 0; j < 3; j++) { for (k = 0; k < 2; k++) { bool mark_last = k & 1; ret = test_hardlink(&ring, 7, i * 3, j * 3, mark_last); if (ret) { fprintf(stderr, "test_hardlink failed" "fail %i skip %i mark last %i\n", i * 3, j * 3, k); return 1; } } } } close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } liburing-2.6/test/socket-getsetsock-cmd.c000066400000000000000000000165051461424365000205350ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Check that {g,s}etsockopt CMD operations on sockets are * consistent. * * The tests basically do the same socket operation using regular system calls * and io_uring commands, and then compare the results. */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define USERDATA 0xff42ff #define MSG "foobarbaz" static int no_sock_opt; struct fds { int tx; int rx; }; static struct fds create_sockets(void) { struct fds retval; int fd[2]; t_create_socket_pair(fd, true); retval.tx = fd[0]; retval.rx = fd[1]; return retval; } static struct io_uring create_ring(void) { struct io_uring ring; int ring_flags = 0; int err; err = io_uring_queue_init(32, &ring, ring_flags); assert(err == 0); return ring; } static int submit_cmd_sqe(struct io_uring *ring, int32_t fd, int op, int level, int optname, void *optval, int optlen) { struct io_uring_sqe *sqe; int err; assert(fd > 0); sqe = io_uring_get_sqe(ring); assert(sqe != NULL); io_uring_prep_cmd_sock(sqe, op, fd, level, optname, optval, optlen); sqe->user_data = USERDATA; /* Submitting SQE */ err = io_uring_submit_and_wait(ring, 1); if (err != 1) fprintf(stderr, "Failure: io_uring_submit_and_wait returned %d\n", err); return err; } static int receive_cqe(struct io_uring *ring) { struct io_uring_cqe *cqe; int err; err = io_uring_wait_cqe(ring, &cqe); assert(err == 0); assert(cqe->user_data == USERDATA); io_uring_cqe_seen(ring, cqe); /* Return the result of the operation */ return cqe->res; } /* * Run getsock operation using SO_RCVBUF using io_uring cmd operation and * getsockopt(2) and compare the results. */ static int run_get_rcvbuf(struct io_uring *ring, struct fds *sockfds) { int sval, uval, ulen, err; unsigned int slen; /* System call values */ slen = sizeof(sval); /* io_uring values */ ulen = sizeof(uval); /* get through io_uring cmd */ err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT, SOL_SOCKET, SO_RCVBUF, &uval, ulen); assert(err == 1); /* Wait for the CQE */ err = receive_cqe(ring); if (err == -EOPNOTSUPP) return T_EXIT_SKIP; if (err < 0) { fprintf(stderr, "Error received. %d\n", err); return T_EXIT_FAIL; } /* The output of CQE->res contains the length */ ulen = err; /* Executes the same operation using system call */ err = getsockopt(sockfds->rx, SOL_SOCKET, SO_RCVBUF, &sval, &slen); assert(err == 0); /* Make sure that io_uring operation returns the same value as the systemcall */ assert(ulen == slen); assert(uval == sval); return T_EXIT_PASS; } /* * Run getsock operation using SO_PEERNAME using io_uring cmd operation * and getsockopt(2) and compare the results. */ static int run_get_peername(struct io_uring *ring, struct fds *sockfds) { struct sockaddr sval, uval = {}; socklen_t slen = sizeof(sval); socklen_t ulen = sizeof(uval); int err; /* Get values from the systemcall */ err = getsockopt(sockfds->tx, SOL_SOCKET, SO_PEERNAME, &sval, &slen); assert(err == 0); /* Getting SO_PEERNAME */ err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_GETSOCKOPT, SOL_SOCKET, SO_PEERNAME, &uval, ulen); assert(err == 1); /* Wait for the CQE */ err = receive_cqe(ring); if (err == -EOPNOTSUPP || err == -EINVAL) { no_sock_opt = 1; return T_EXIT_SKIP; } if (err < 0) { fprintf(stderr, "%s: Error in the CQE: %d\n", __func__, err); return T_EXIT_FAIL; } /* The length comes from cqe->res, which is returned from receive_cqe() */ ulen = err; /* Make sure that io_uring operation returns the same values as the systemcall */ assert(sval.sa_family == uval.sa_family); assert(slen == ulen); return T_EXIT_PASS; } /* * Run getsockopt tests. Basically comparing io_uring output and systemcall results */ static int run_getsockopt_test(struct io_uring *ring, struct fds *sockfds) { int err; fprintf(stderr, "Testing getsockopt SO_PEERNAME\n"); err = run_get_peername(ring, sockfds); if (err) return err; fprintf(stderr, "Testing getsockopt SO_RCVBUF\n"); return run_get_rcvbuf(ring, sockfds); } /* * Given a `val` value, set it in SO_REUSEPORT using io_uring cmd, and read using * getsockopt(2), and make sure they match. */ static int run_setsockopt_reuseport(struct io_uring *ring, struct fds *sockfds, int val) { unsigned int slen, ulen; int sval, uval = val; int err; slen = sizeof(sval); ulen = sizeof(uval); /* Setting SO_REUSEPORT */ err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT, SOL_SOCKET, SO_REUSEPORT, &uval, ulen); assert(err == 1); err = receive_cqe(ring); if (err == -EOPNOTSUPP) return T_EXIT_SKIP; /* Get values from the systemcall */ err = getsockopt(sockfds->rx, SOL_SOCKET, SO_REUSEPORT, &sval, &slen); assert(err == 0); /* Make sure the set using io_uring cmd matches what systemcall returns */ assert(uval == sval); assert(ulen == slen); return T_EXIT_PASS; } /* * Given a `val` value, set the TCP_USER_TIMEOUT using io_uring and read using * getsockopt(2). Make sure they match */ static int run_setsockopt_usertimeout(struct io_uring *ring, struct fds *sockfds, int val) { int optname = TCP_USER_TIMEOUT; int level = IPPROTO_TCP; unsigned int slen, ulen; int sval, uval, err; slen = sizeof(uval); ulen = sizeof(uval); uval = val; /* Setting timeout */ err = submit_cmd_sqe(ring, sockfds->rx, SOCKET_URING_OP_SETSOCKOPT, level, optname, &uval, ulen); assert(err == 1); err = receive_cqe(ring); if (err == -EOPNOTSUPP) return T_EXIT_SKIP; if (err < 0) { fprintf(stderr, "%s: Got an error: %d\n", __func__, err); return T_EXIT_FAIL; } /* Get the value from the systemcall, to make sure it was set */ err = getsockopt(sockfds->rx, level, optname, &sval, &slen); assert(err == 0); assert(uval == sval); return T_EXIT_PASS; } /* Test setsockopt() for SOL_SOCKET */ static int run_setsockopt_test(struct io_uring *ring, struct fds *sockfds) { int err, i; fprintf(stderr, "Testing setsockopt SOL_SOCKET/SO_REUSEPORT\n"); for (i = 0; i <= 1; i++) { err = run_setsockopt_reuseport(ring, sockfds, i); if (err) return err; } fprintf(stderr, "Testing setsockopt IPPROTO_TCP/TCP_FASTOPEN\n"); for (i = 1; i <= 10; i++) { err = run_setsockopt_usertimeout(ring, sockfds, i); if (err) return err; } return err; } /* Send data through the sockets */ static void send_data(struct fds *s) { int written_bytes; /* Send data sing the sockstruct->send */ written_bytes = write(s->tx, MSG, strlen(MSG)); assert(written_bytes == strlen(MSG)); } int main(int argc, char *argv[]) { struct fds sockfds; struct io_uring ring; int err; if (argc > 1) return T_EXIT_SKIP; /* Simply io_uring ring creation */ ring = create_ring(); /* Create sockets */ sockfds = create_sockets(); send_data(&sockfds); err = run_getsockopt_test(&ring, &sockfds); if (err) { if (err == T_EXIT_SKIP) { fprintf(stderr, "Skipping tests.\n"); return T_EXIT_SKIP; } fprintf(stderr, "Failed to run test: %d\n", err); return err; } if (no_sock_opt) return T_EXIT_SKIP; err = run_setsockopt_test(&ring, &sockfds); if (err) { if (err == T_EXIT_SKIP) { fprintf(stderr, "Skipping tests.\n"); return T_EXIT_SKIP; } fprintf(stderr, "Failed to run test: %d\n", err); return err; } io_uring_queue_exit(&ring); return err; } liburing-2.6/test/socket-io-cmd.c000066400000000000000000000116341461424365000167670ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that CMD operations on sockets are consistent. */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define USERDATA 0x1234 #define MSG "foobarbaz" static int no_io_cmd; struct fds { int tx; int rx; }; /* Create 2 sockets (tx, rx) given the socket type */ static struct fds create_sockets(bool stream) { struct fds retval; int fd[2]; t_create_socket_pair(fd, stream); retval.tx = fd[0]; retval.rx = fd[1]; return retval; } static int create_sqe_and_submit(struct io_uring *ring, int32_t fd, int op) { struct io_uring_sqe *sqe; int ret; assert(fd > 0); sqe = io_uring_get_sqe(ring); assert(sqe != NULL); io_uring_prep_cmd_sock(sqe, op, fd, 0, 0, NULL, 0); sqe->user_data = USERDATA; /* Submitting SQE */ ret = io_uring_submit_and_wait(ring, 1); if (ret <= 0) return ret; return 0; } static int receive_cqe(struct io_uring *ring) { struct io_uring_cqe *cqe; int err; err = io_uring_wait_cqe(ring, &cqe); assert(err == 0); assert(cqe->user_data == USERDATA); err = cqe->res; io_uring_cqe_seen(ring, cqe); /* Return the result of the operation */ return err; } static ssize_t send_data(struct fds *s, char *str) { size_t written_bytes; written_bytes = write(s->tx, str, strlen(str)); assert(written_bytes == strlen(MSG)); return written_bytes; } static int run_test(bool stream) { struct fds sockfds; ssize_t bytes_in, bytes_out; struct io_uring ring; size_t written_bytes; int error; /* Create three sockets */ sockfds = create_sockets(stream); assert(sockfds.tx > 0); assert(sockfds.rx > 0); /* Send data sing the sockfds->send */ written_bytes = send_data(&sockfds, MSG); /* Simply io_uring ring creation */ error = t_create_ring(1, &ring, 0); if (error == T_SETUP_SKIP) return error; else if (error != T_SETUP_OK) return T_EXIT_FAIL; error = create_sqe_and_submit(&ring, sockfds.rx, SOCKET_URING_OP_SIOCINQ); if (error) return T_EXIT_FAIL; bytes_in = receive_cqe(&ring); if (bytes_in < 0) { if (bytes_in == -EINVAL || bytes_in == -EOPNOTSUPP) { no_io_cmd = 1; return T_EXIT_SKIP; } fprintf(stderr, "Bad return value %ld\n", (long) bytes_in); return T_EXIT_FAIL; } error = create_sqe_and_submit(&ring, sockfds.tx, SOCKET_URING_OP_SIOCOUTQ); if (error) return T_EXIT_FAIL; bytes_out = receive_cqe(&ring); if (bytes_in == -ENOTSUP || bytes_out == -ENOTSUP) { fprintf(stderr, "Skipping tests. -ENOTSUP returned\n"); return T_EXIT_SKIP; } /* * Assert the number of written bytes are either in the socket buffer * or on the receive side */ if (bytes_in + bytes_out != written_bytes) { fprintf(stderr, "values does not match: %zu+%zu != %zu\n", bytes_in, bytes_out, written_bytes); return T_EXIT_FAIL; } io_uring_queue_exit(&ring); return T_EXIT_PASS; } /* * Make sure that siocoutq and siocinq returns the same value * using ioctl(2) and uring commands for raw sockets */ static int run_test_raw(void) { int ioctl_siocoutq, ioctl_siocinq; int uring_siocoutq, uring_siocinq; struct io_uring ring; int retry = 0, sock, error; sock = socket(PF_INET, SOCK_RAW, IPPROTO_TCP); if (sock == -1) { /* You need root to create raw socket */ perror("Not able to create a raw socket"); return T_EXIT_SKIP; } /* Get the same operation using uring cmd */ error = t_create_ring(1, &ring, 0); if (error == T_SETUP_SKIP) return error; else if (error != T_SETUP_OK) return T_EXIT_FAIL; again: /* Simple SIOCOUTQ using ioctl */ error = ioctl(sock, SIOCOUTQ, &ioctl_siocoutq); if (error < 0) { fprintf(stderr, "Failed to run ioctl(SIOCOUTQ): %d\n", error); return T_EXIT_FAIL; } error = ioctl(sock, SIOCINQ, &ioctl_siocinq); if (error < 0) { fprintf(stderr, "Failed to run ioctl(SIOCINQ): %d\n", error); return T_EXIT_FAIL; } create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCOUTQ); uring_siocoutq = receive_cqe(&ring); create_sqe_and_submit(&ring, sock, SOCKET_URING_OP_SIOCINQ); uring_siocinq = receive_cqe(&ring); /* Compare that both values (ioctl and uring CMD) should be similar */ if (uring_siocoutq != ioctl_siocoutq) { if (!retry) { retry = 1; goto again; } fprintf(stderr, "values does not match: %d != %d\n", uring_siocoutq, ioctl_siocoutq); return T_EXIT_FAIL; } if (uring_siocinq != ioctl_siocinq) { if (!retry) { retry = 1; goto again; } fprintf(stderr, "values does not match: %d != %d\n", uring_siocinq, ioctl_siocinq); return T_EXIT_FAIL; } return T_EXIT_PASS; } int main(int argc, char *argv[]) { int err; if (argc > 1) return 0; /* Test SOCK_STREAM */ err = run_test(true); if (err) return err; if (no_io_cmd) return T_EXIT_SKIP; /* Test SOCK_DGRAM */ err = run_test(false); if (err) return err; /* Test raw sockets */ return run_test_raw(); } liburing-2.6/test/socket-rw-eagain.c000066400000000000000000000062401461424365000174660ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that a readv on a nonblocking socket queued before a writev doesn't * wait for data to arrive. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { int p_fd[2], ret; int32_t recv_s0; int32_t val = 1; struct sockaddr_in addr; struct iovec iov_r[1], iov_w[1]; if (argc > 1) return 0; srand(getpid()); recv_s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(recv_s0, &addr); assert(!ret); ret = listen(recv_s0, 128); assert(ret != -1); p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); val = 1; ret = setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); int32_t flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); ret = connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)); assert(ret == -1); p_fd[0] = accept(recv_s0, NULL, NULL); assert(p_fd[0] != -1); flags = fcntl(p_fd[0], F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(p_fd[0], F_SETFL, flags); assert(ret != -1); while (1) { int32_t code; socklen_t code_len = sizeof(code); ret = getsockopt(p_fd[1], SOL_SOCKET, SO_ERROR, &code, &code_len); assert(ret != -1); if (!code) break; } struct io_uring m_io_uring; struct io_uring_params p = { }; ret = io_uring_queue_init_params(32, &m_io_uring, &p); assert(ret >= 0); if (p.features & IORING_FEAT_FAST_POLL) return 0; char recv_buff[128]; char send_buff[128]; { iov_r[0].iov_base = recv_buff; iov_r[0].iov_len = sizeof(recv_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_readv(sqe, p_fd[0], iov_r, 1, 0); sqe->user_data = 1; } { iov_w[0].iov_base = send_buff; iov_w[0].iov_len = sizeof(send_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_writev(sqe, p_fd[1], iov_w, 1, 0); sqe->user_data = 2; } ret = io_uring_submit_and_wait(&m_io_uring, 2); assert(ret != -1); struct io_uring_cqe* cqe; uint32_t head; uint32_t count = 0; while (count != 2) { io_uring_for_each_cqe(&m_io_uring, head, cqe) { if (cqe->user_data == 2 && cqe->res != 128) { fprintf(stderr, "write=%d\n", cqe->res); goto err; } else if (cqe->user_data == 1 && cqe->res != -EAGAIN) { fprintf(stderr, "read=%d\n", cqe->res); goto err; } count++; } assert(count <= 2); io_uring_cq_advance(&m_io_uring, count); } io_uring_queue_exit(&m_io_uring); return 0; err: io_uring_queue_exit(&m_io_uring); return 1; } liburing-2.6/test/socket-rw-offset.c000066400000000000000000000061211461424365000175260ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that a readv on a socket queued before a writev doesn't hang * the processing. * * From Hrvoje Zeba */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { int p_fd[2], ret; int32_t recv_s0; int32_t val = 1; struct sockaddr_in addr; struct iovec iov_r[1], iov_w[1]; if (argc > 1) return 0; srand(getpid()); recv_s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(recv_s0, &addr); assert(!ret); ret = listen(recv_s0, 128); assert(ret != -1); p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); val = 1; ret = setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); int32_t flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); ret = connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)); assert(ret == -1); flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags &= ~O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); p_fd[0] = accept(recv_s0, NULL, NULL); assert(p_fd[0] != -1); while (1) { int32_t code; socklen_t code_len = sizeof(code); ret = getsockopt(p_fd[1], SOL_SOCKET, SO_ERROR, &code, &code_len); assert(ret != -1); if (!code) break; } struct io_uring m_io_uring; struct io_uring_params p = { }; ret = io_uring_queue_init_params(32, &m_io_uring, &p); assert(ret >= 0); /* skip for kernels without cur position read/write */ if (!(p.features & IORING_FEAT_RW_CUR_POS)) return 0; char recv_buff[128]; char send_buff[128]; { iov_r[0].iov_base = recv_buff; iov_r[0].iov_len = sizeof(recv_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_readv(sqe, p_fd[0], iov_r, 1, -1); } { iov_w[0].iov_base = send_buff; iov_w[0].iov_len = sizeof(send_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_writev(sqe, p_fd[1], iov_w, 1, 0); } ret = io_uring_submit_and_wait(&m_io_uring, 2); assert(ret != -1); struct io_uring_cqe* cqe; uint32_t head; uint32_t count = 0; ret = 0; while (count != 2) { io_uring_for_each_cqe(&m_io_uring, head, cqe) { if (cqe->res != 128) { fprintf(stderr, "Got %d, expected 128\n", cqe->res); ret = 1; goto err; } assert(cqe->res == 128); count++; } assert(count <= 2); io_uring_cq_advance(&m_io_uring, count); } err: io_uring_queue_exit(&m_io_uring); return ret; } liburing-2.6/test/socket-rw.c000066400000000000000000000054611461424365000162500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Check that a readv on a socket queued before a writev doesn't hang * the processing. * * From Hrvoje Zeba */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { int p_fd[2], ret; int32_t recv_s0; int32_t val = 1; struct sockaddr_in addr; struct iovec iov_r[1], iov_w[1]; if (argc > 1) return 0; srand(getpid()); recv_s0 = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)); assert(ret != -1); ret = setsockopt(recv_s0, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); assert(ret != -1); addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr("127.0.0.1"); ret = t_bind_ephemeral_port(recv_s0, &addr); assert(!ret); ret = listen(recv_s0, 128); assert(ret != -1); p_fd[1] = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); val = 1; ret = setsockopt(p_fd[1], IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); assert(ret != -1); int32_t flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags |= O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); ret = connect(p_fd[1], (struct sockaddr*)&addr, sizeof(addr)); assert(ret == -1); flags = fcntl(p_fd[1], F_GETFL, 0); assert(flags != -1); flags &= ~O_NONBLOCK; ret = fcntl(p_fd[1], F_SETFL, flags); assert(ret != -1); p_fd[0] = accept(recv_s0, NULL, NULL); assert(p_fd[0] != -1); while (1) { int32_t code; socklen_t code_len = sizeof(code); ret = getsockopt(p_fd[1], SOL_SOCKET, SO_ERROR, &code, &code_len); assert(ret != -1); if (!code) break; } struct io_uring m_io_uring; ret = io_uring_queue_init(32, &m_io_uring, 0); assert(ret >= 0); char recv_buff[128]; char send_buff[128]; { iov_r[0].iov_base = recv_buff; iov_r[0].iov_len = sizeof(recv_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_readv(sqe, p_fd[0], iov_r, 1, 0); } { iov_w[0].iov_base = send_buff; iov_w[0].iov_len = sizeof(send_buff); struct io_uring_sqe* sqe = io_uring_get_sqe(&m_io_uring); assert(sqe != NULL); io_uring_prep_writev(sqe, p_fd[1], iov_w, 1, 0); } ret = io_uring_submit_and_wait(&m_io_uring, 2); assert(ret != -1); struct io_uring_cqe* cqe; uint32_t head; uint32_t count = 0; while (count != 2) { io_uring_for_each_cqe(&m_io_uring, head, cqe) { assert(cqe->res == 128); count++; } assert(count <= 2); io_uring_cq_advance(&m_io_uring, count); } io_uring_queue_exit(&m_io_uring); return 0; } liburing-2.6/test/socket.c000066400000000000000000000201241461424365000156130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Simple test case using the socket op */ #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static char str[] = "This is a test of send and recv over io_uring!"; #define MAX_MSG 128 #define HOST "127.0.0.1" static int no_socket; static __be32 g_port; static int recv_prep(struct io_uring *ring, struct iovec *iov, int *sock, int registerfiles) { struct sockaddr_in saddr; struct io_uring_sqe *sqe; int sockfd, ret, val, use_fd; memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } val = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); if (t_bind_ephemeral_port(sockfd, &saddr)) { perror("bind"); goto err; } g_port = saddr.sin_port; if (registerfiles) { ret = io_uring_register_files(ring, &sockfd, 1); if (ret) { fprintf(stderr, "file reg failed\n"); goto err; } use_fd = 0; } else { use_fd = sockfd; } sqe = io_uring_get_sqe(ring); io_uring_prep_recv(sqe, use_fd, iov->iov_base, iov->iov_len, 0); if (registerfiles) sqe->flags |= IOSQE_FIXED_FILE; sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } *sock = sockfd; return 0; err: close(sockfd); return 1; } static int do_recv(struct io_uring *ring, struct iovec *iov) { struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stdout, "wait_cqe: %d\n", ret); goto err; } if (cqe->res == -EINVAL) { fprintf(stdout, "recv not supported, skipping\n"); return 0; } if (cqe->res < 0) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } if (cqe->res -1 != strlen(str)) { fprintf(stderr, "got wrong length: %d/%d\n", cqe->res, (int) strlen(str) + 1); goto err; } if (strcmp(str, iov->iov_base)) { fprintf(stderr, "string mismatch\n"); goto err; } return 0; err: return 1; } struct recv_data { pthread_mutex_t mutex; int use_sqthread; int registerfiles; }; static void *recv_fn(void *data) { struct recv_data *rd = data; char buf[MAX_MSG + 1]; struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) - 1, }; struct io_uring_params p = { }; struct io_uring ring; int ret, sock; if (rd->use_sqthread) p.flags = IORING_SETUP_SQPOLL; ret = t_create_ring_params(1, &ring, &p); if (ret == T_SETUP_SKIP) { pthread_mutex_unlock(&rd->mutex); ret = 0; goto err; } else if (ret < 0) { pthread_mutex_unlock(&rd->mutex); goto err; } if (rd->use_sqthread && !rd->registerfiles) { if (!(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { fprintf(stdout, "Non-registered SQPOLL not available, skipping\n"); pthread_mutex_unlock(&rd->mutex); goto err; } } ret = recv_prep(&ring, &iov, &sock, rd->registerfiles); if (ret) { fprintf(stderr, "recv_prep failed: %d\n", ret); goto err; } pthread_mutex_unlock(&rd->mutex); ret = do_recv(&ring, &iov); close(sock); io_uring_queue_exit(&ring); err: return (void *)(intptr_t)ret; } static int fallback_send(struct io_uring *ring, struct sockaddr_in *saddr) { struct iovec iov = { .iov_base = str, .iov_len = sizeof(str), }; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret; sockfd = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { perror("socket"); return 1; } ret = connect(sockfd, (struct sockaddr *)saddr, sizeof(*saddr)); if (ret < 0) { perror("connect"); return 1; } sqe = io_uring_get_sqe(ring); io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (cqe->res == -EINVAL) { fprintf(stdout, "send not supported, skipping\n"); close(sockfd); return 0; } if (cqe->res != iov.iov_len) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } close(sockfd); return 0; err: close(sockfd); return 1; } static int do_send(int socket_direct, int alloc) { struct sockaddr_in saddr; struct iovec iov = { .iov_base = str, .iov_len = sizeof(str), }; struct io_uring ring; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int sockfd, ret, fd = -1; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return 1; } if (socket_direct) { ret = io_uring_register_files(&ring, &fd, 1); if (ret) { fprintf(stderr, "file register %d\n", ret); return 1; } } assert(g_port != 0); memset(&saddr, 0, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_port = g_port; inet_pton(AF_INET, HOST, &saddr.sin_addr); sqe = io_uring_get_sqe(&ring); if (socket_direct) { unsigned file_index = 0; if (alloc) file_index = IORING_FILE_INDEX_ALLOC - 1; io_uring_prep_socket_direct(sqe, AF_INET, SOCK_DGRAM, 0, file_index, 0); } else { io_uring_prep_socket(sqe, AF_INET, SOCK_DGRAM, 0, 0); } ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "socket submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } if (cqe->res < 0) { if (cqe->res == -EINVAL) { no_socket = 1; io_uring_cqe_seen(&ring, cqe); return fallback_send(&ring, &saddr); } fprintf(stderr, "socket res: %d\n", ret); return 1; } sockfd = cqe->res; if (socket_direct && !alloc) sockfd = 0; io_uring_cqe_seen(&ring, cqe); sqe = io_uring_get_sqe(&ring); io_uring_prep_connect(sqe, sockfd, (struct sockaddr *) &saddr, sizeof(saddr)); if (socket_direct) sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "connect submit: %d\n", ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe: %d\n", ret); return 1; } if (cqe->res < 0) { fprintf(stderr, "connect res: %d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); sqe = io_uring_get_sqe(&ring); io_uring_prep_send(sqe, sockfd, iov.iov_base, iov.iov_len, 0); sqe->user_data = 1; if (socket_direct) sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(&ring, &cqe); if (cqe->res == -EINVAL) { fprintf(stdout, "send not supported, skipping\n"); close(sockfd); return 0; } if (cqe->res != iov.iov_len) { fprintf(stderr, "failed cqe: %d\n", cqe->res); goto err; } close(sockfd); return 0; err: close(sockfd); return 1; } static int test(int use_sqthread, int regfiles, int socket_direct, int alloc) { pthread_mutexattr_t attr; pthread_t recv_thread; struct recv_data rd; int ret; void *retval; pthread_mutexattr_init(&attr); pthread_mutexattr_setpshared(&attr, 1); pthread_mutex_init(&rd.mutex, &attr); pthread_mutex_lock(&rd.mutex); rd.use_sqthread = use_sqthread; rd.registerfiles = regfiles; ret = pthread_create(&recv_thread, NULL, recv_fn, &rd); if (ret) { fprintf(stderr, "Thread create failed: %d\n", ret); pthread_mutex_unlock(&rd.mutex); return 1; } pthread_mutex_lock(&rd.mutex); do_send(socket_direct, alloc); pthread_join(recv_thread, &retval); return (intptr_t)retval; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test(0, 0, 0, 0); if (ret) { fprintf(stderr, "test sqthread=0 failed\n"); return ret; } if (no_socket) return 0; ret = test(1, 1, 0, 0); if (ret) { fprintf(stderr, "test sqthread=1 reg=1 failed\n"); return ret; } ret = test(1, 0, 0, 0); if (ret) { fprintf(stderr, "test sqthread=1 reg=0 failed\n"); return ret; } ret = test(0, 0, 1, 0); if (ret) { fprintf(stderr, "test sqthread=0 direct=1 failed\n"); return ret; } ret = test(0, 0, 1, 1); if (ret) { fprintf(stderr, "test sqthread=0 direct=alloc failed\n"); return ret; } return 0; } liburing-2.6/test/splice.c000066400000000000000000000241461461424365000156120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define BUF_SIZE (16 * 4096) struct test_ctx { int real_pipe1[2]; int real_pipe2[2]; int real_fd_in; int real_fd_out; /* fds or for registered files */ int pipe1[2]; int pipe2[2]; int fd_in; int fd_out; void *buf_in; void *buf_out; }; static unsigned int splice_flags = 0; static unsigned int sqe_flags = 0; static int has_splice = 0; static int has_tee = 0; static int read_buf(int fd, void *buf, int len) { int ret; while (len) { ret = read(fd, buf, len); if (ret < 0) return ret; len -= ret; buf += ret; } return 0; } static int write_buf(int fd, const void *buf, int len) { int ret; while (len) { ret = write(fd, buf, len); if (ret < 0) return ret; len -= ret; buf += ret; } return 0; } static int check_content(int fd, void *buf, int len, const void *src) { int ret; ret = read_buf(fd, buf, len); if (ret) return ret; ret = memcmp(buf, src, len); return (ret != 0) ? -1 : 0; } static int create_file(const char *filename) { int fd, save_errno; fd = open(filename, O_RDWR | O_CREAT, 0644); save_errno = errno; unlink(filename); errno = save_errno; return fd; } static int init_splice_ctx(struct test_ctx *ctx) { int ret, rnd_fd; ctx->buf_in = t_calloc(BUF_SIZE, 1); ctx->buf_out = t_calloc(BUF_SIZE, 1); ctx->fd_in = create_file(".splice-test-in"); if (ctx->fd_in < 0) { perror("file open"); return 1; } ctx->fd_out = create_file(".splice-test-out"); if (ctx->fd_out < 0) { perror("file open"); return 1; } /* get random data */ rnd_fd = open("/dev/urandom", O_RDONLY); if (rnd_fd < 0) return 1; ret = read_buf(rnd_fd, ctx->buf_in, BUF_SIZE); if (ret != 0) return 1; close(rnd_fd); /* populate file */ ret = write_buf(ctx->fd_in, ctx->buf_in, BUF_SIZE); if (ret) return ret; if (pipe(ctx->pipe1) < 0) return 1; if (pipe(ctx->pipe2) < 0) return 1; ctx->real_pipe1[0] = ctx->pipe1[0]; ctx->real_pipe1[1] = ctx->pipe1[1]; ctx->real_pipe2[0] = ctx->pipe2[0]; ctx->real_pipe2[1] = ctx->pipe2[1]; ctx->real_fd_in = ctx->fd_in; ctx->real_fd_out = ctx->fd_out; return 0; } static int do_splice_op(struct io_uring *ring, int fd_in, loff_t off_in, int fd_out, loff_t off_out, unsigned int len, __u8 opcode) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret = -1; do { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return -1; } io_uring_prep_splice(sqe, fd_in, off_in, fd_out, off_out, len, splice_flags); sqe->flags |= sqe_flags; sqe->user_data = 42; sqe->opcode = opcode; ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "sqe submit failed: %d\n", ret); return ret; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", cqe->res); return ret; } if (cqe->res <= 0) { io_uring_cqe_seen(ring, cqe); return cqe->res; } len -= cqe->res; if (off_in != -1) off_in += cqe->res; if (off_out != -1) off_out += cqe->res; io_uring_cqe_seen(ring, cqe); } while (len); return 0; } static int do_splice(struct io_uring *ring, int fd_in, loff_t off_in, int fd_out, loff_t off_out, unsigned int len) { return do_splice_op(ring, fd_in, off_in, fd_out, off_out, len, IORING_OP_SPLICE); } static int do_tee(struct io_uring *ring, int fd_in, int fd_out, unsigned int len) { return do_splice_op(ring, fd_in, 0, fd_out, 0, len, IORING_OP_TEE); } static void check_splice_support(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_splice(ring, -1, 0, -1, 0, BUF_SIZE); has_splice = (ret == -EBADF); } static void check_tee_support(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_tee(ring, -1, -1, BUF_SIZE); has_tee = (ret == -EBADF); } static int check_zero_splice(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_splice(ring, ctx->fd_in, -1, ctx->pipe1[1], -1, 0); if (ret) return ret; ret = do_splice(ring, ctx->pipe2[0], -1, ctx->pipe1[1], -1, 0); if (ret) return ret; return 0; } static int splice_to_pipe(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = lseek(ctx->real_fd_in, 0, SEEK_SET); if (ret) return ret; /* implicit file offset */ ret = do_splice(ring, ctx->fd_in, -1, ctx->pipe1[1], -1, BUF_SIZE); if (ret) return ret; ret = check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, ctx->buf_in); if (ret) return ret; /* explicit file offset */ ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], -1, BUF_SIZE); if (ret) return ret; return check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, ctx->buf_in); } static int splice_from_pipe(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = write_buf(ctx->real_pipe1[1], ctx->buf_in, BUF_SIZE); if (ret) return ret; ret = do_splice(ring, ctx->pipe1[0], -1, ctx->fd_out, 0, BUF_SIZE); if (ret) return ret; ret = check_content(ctx->real_fd_out, ctx->buf_out, BUF_SIZE, ctx->buf_in); if (ret) return ret; ret = ftruncate(ctx->real_fd_out, 0); if (ret) return ret; return lseek(ctx->real_fd_out, 0, SEEK_SET); } static int splice_pipe_to_pipe(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], -1, BUF_SIZE); if (ret) return ret; ret = do_splice(ring, ctx->pipe1[0], -1, ctx->pipe2[1], -1, BUF_SIZE); if (ret) return ret; return check_content(ctx->real_pipe2[0], ctx->buf_out, BUF_SIZE, ctx->buf_in); } static int fail_splice_pipe_offset(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_splice(ring, ctx->fd_in, 0, ctx->pipe1[1], 0, BUF_SIZE); if (ret != -ESPIPE && ret != -EINVAL) return ret; ret = do_splice(ring, ctx->pipe1[0], 0, ctx->fd_out, 0, BUF_SIZE); if (ret != -ESPIPE && ret != -EINVAL) return ret; return 0; } static int fail_tee_nonpipe(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_tee(ring, ctx->fd_in, ctx->pipe1[1], BUF_SIZE); if (ret != -ESPIPE && ret != -EINVAL) return ret; return 0; } static int fail_tee_offset(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = do_splice_op(ring, ctx->pipe2[0], -1, ctx->pipe1[1], 0, BUF_SIZE, IORING_OP_TEE); if (ret != -ESPIPE && ret != -EINVAL) return ret; ret = do_splice_op(ring, ctx->pipe2[0], 0, ctx->pipe1[1], -1, BUF_SIZE, IORING_OP_TEE); if (ret != -ESPIPE && ret != -EINVAL) return ret; return 0; } static int check_tee(struct io_uring *ring, struct test_ctx *ctx) { int ret; ret = write_buf(ctx->real_pipe1[1], ctx->buf_in, BUF_SIZE); if (ret) return ret; ret = do_tee(ring, ctx->pipe1[0], ctx->pipe2[1], BUF_SIZE); if (ret) return ret; ret = check_content(ctx->real_pipe1[0], ctx->buf_out, BUF_SIZE, ctx->buf_in); if (ret) { fprintf(stderr, "tee(), invalid src data\n"); return ret; } ret = check_content(ctx->real_pipe2[0], ctx->buf_out, BUF_SIZE, ctx->buf_in); if (ret) { fprintf(stderr, "tee(), invalid dst data\n"); return ret; } return 0; } static int check_zero_tee(struct io_uring *ring, struct test_ctx *ctx) { return do_tee(ring, ctx->pipe2[0], ctx->pipe1[1], 0); } static int test_splice(struct io_uring *ring, struct test_ctx *ctx) { int ret; if (has_splice) { ret = check_zero_splice(ring, ctx); if (ret) { fprintf(stderr, "check_zero_splice failed %i %i\n", ret, errno); return ret; } ret = splice_to_pipe(ring, ctx); if (ret) { fprintf(stderr, "splice_to_pipe failed %i %i\n", ret, errno); return ret; } ret = splice_from_pipe(ring, ctx); if (ret) { fprintf(stderr, "splice_from_pipe failed %i %i\n", ret, errno); return ret; } ret = splice_pipe_to_pipe(ring, ctx); if (ret) { fprintf(stderr, "splice_pipe_to_pipe failed %i %i\n", ret, errno); return ret; } ret = fail_splice_pipe_offset(ring, ctx); if (ret) { fprintf(stderr, "fail_splice_pipe_offset failed %i %i\n", ret, errno); return ret; } } if (has_tee) { ret = check_zero_tee(ring, ctx); if (ret) { fprintf(stderr, "check_zero_tee() failed %i %i\n", ret, errno); return ret; } ret = fail_tee_nonpipe(ring, ctx); if (ret) { fprintf(stderr, "fail_tee_nonpipe() failed %i %i\n", ret, errno); return ret; } ret = fail_tee_offset(ring, ctx); if (ret) { fprintf(stderr, "fail_tee_offset failed %i %i\n", ret, errno); return ret; } ret = check_tee(ring, ctx); if (ret) { fprintf(stderr, "check_tee() failed %i %i\n", ret, errno); return ret; } } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; struct io_uring_params p = { }; struct test_ctx ctx; int ret; int reg_fds[6]; if (argc > 1) return 0; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } if (!(p.features & IORING_FEAT_FAST_POLL)) { fprintf(stdout, "No splice support, skipping\n"); return 0; } ret = init_splice_ctx(&ctx); if (ret) { fprintf(stderr, "init failed %i %i\n", ret, errno); return 1; } check_splice_support(&ring, &ctx); if (!has_splice) fprintf(stdout, "skip, doesn't support splice()\n"); check_tee_support(&ring, &ctx); if (!has_tee) fprintf(stdout, "skip, doesn't support tee()\n"); ret = test_splice(&ring, &ctx); if (ret) { fprintf(stderr, "basic splice tests failed\n"); return ret; } reg_fds[0] = ctx.real_pipe1[0]; reg_fds[1] = ctx.real_pipe1[1]; reg_fds[2] = ctx.real_pipe2[0]; reg_fds[3] = ctx.real_pipe2[1]; reg_fds[4] = ctx.real_fd_in; reg_fds[5] = ctx.real_fd_out; ret = io_uring_register_files(&ring, reg_fds, 6); if (ret) { fprintf(stderr, "%s: register ret=%d\n", __FUNCTION__, ret); return 1; } /* remap fds to registered */ ctx.pipe1[0] = 0; ctx.pipe1[1] = 1; ctx.pipe2[0] = 2; ctx.pipe2[1] = 3; ctx.fd_in = 4; ctx.fd_out = 5; splice_flags = SPLICE_F_FD_IN_FIXED; sqe_flags = IOSQE_FIXED_FILE; ret = test_splice(&ring, &ctx); if (ret) { fprintf(stderr, "registered fds splice tests failed\n"); return ret; } return 0; } liburing-2.6/test/sq-full-cpp.cc000066400000000000000000000013071461424365000166330ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test SQ queue full condition * */ #include #include #include #include #include #include #include "liburing.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring ring; int ret, i; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } i = 0; while ((sqe = io_uring_get_sqe(&ring)) != NULL) i++; if (i != 8) { fprintf(stderr, "Got %d SQEs, wanted 8\n", i); goto err; } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } liburing-2.6/test/sq-full.c000066400000000000000000000013071461424365000157100ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test SQ queue full condition * */ #include #include #include #include #include #include #include "liburing.h" int main(int argc, char *argv[]) { struct io_uring_sqe *sqe; struct io_uring ring; int ret, i; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } i = 0; while ((sqe = io_uring_get_sqe(&ring)) != NULL) i++; if (i != 8) { fprintf(stderr, "Got %d SQEs, wanted 8\n", i); goto err; } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } liburing-2.6/test/sq-poll-dup.c000066400000000000000000000071671461424365000165140ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test SQPOLL with IORING_SETUP_ATTACH_WQ and closing of * the original ring descriptor. */ #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024 * 1024) #define BS 4096 #define BUFFERS 64 #define NR_RINGS 4 static struct iovec *vecs; static struct io_uring rings[NR_RINGS]; static int wait_io(struct io_uring *ring, int nr_ios) { struct io_uring_cqe *cqe; int ret; while (nr_ios) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "wait_ret=%d\n", ret); return 1; } if (cqe->res != BS) { fprintf(stderr, "Unexpected ret %d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); nr_ios--; } return 0; } static int queue_io(struct io_uring *ring, int fd, int nr_ios) { unsigned long off; int i; i = 0; off = 0; while (nr_ios) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) break; io_uring_prep_read(sqe, fd, vecs[i].iov_base, vecs[i].iov_len, off); nr_ios--; i++; off += BS; } io_uring_submit(ring); return i; } static int do_io(int fd, int ring_start, int ring_end) { int i, rets[NR_RINGS]; unsigned ios = 0; while (ios < 32) { for (i = ring_start; i < ring_end; i++) { int ret = queue_io(&rings[i], fd, BUFFERS); if (ret < 0) goto err; rets[i] = ret; } for (i = ring_start; i < ring_end; i++) { if (wait_io(&rings[i], rets[i])) goto err; } ios += BUFFERS; } return 0; err: return 1; } static int test(int fd, int do_dup_and_close, int close_ring) { int i, ret, ring_fd; for (i = 0; i < NR_RINGS; i++) { struct io_uring_params p = { }; p.flags = IORING_SETUP_SQPOLL; p.sq_thread_idle = 100; if (i) { p.wq_fd = rings[0].ring_fd; p.flags |= IORING_SETUP_ATTACH_WQ; } ret = io_uring_queue_init_params(BUFFERS, &rings[i], &p); if (ret) { fprintf(stderr, "queue_init: %d/%d\n", ret, i); goto err; } /* no sharing for non-fixed either */ if (!(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { fprintf(stdout, "No SQPOLL sharing, skipping\n"); return 0; } } /* test all rings */ if (do_io(fd, 0, NR_RINGS)) goto err; /* dup and close original ring fd */ ring_fd = dup(rings[0].ring_fd); if (close_ring) close(rings[0].ring_fd); rings[0].ring_fd = rings[0].enter_ring_fd = ring_fd; if (do_dup_and_close) goto done; /* test all but closed one */ if (do_io(fd, 1, NR_RINGS)) goto err; /* test closed one */ if (do_io(fd, 0, 1)) goto err; /* make sure thread is idle so we enter the kernel */ usleep(200000); /* test closed one */ if (do_io(fd, 0, 1)) goto err; done: for (i = 0; i < NR_RINGS; i++) io_uring_queue_exit(&rings[i]); return 0; err: return 1; } int main(int argc, char *argv[]) { char *fname; int ret, fd; if (argc > 1) { fname = argv[1]; } else { fname = ".basic-rw-poll-dup"; t_create_file(fname, FILE_SIZE); } vecs = t_create_buffers(BUFFERS, BS); fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0) { int __e = errno; if (fname != argv[1]) unlink(fname); if (__e == EINVAL) return T_EXIT_SKIP; perror("open"); return -1; } if (fname != argv[1]) unlink(fname); ret = test(fd, 0, 0); if (ret) { fprintf(stderr, "test 0 0 failed\n"); goto err; } ret = test(fd, 0, 1); if (ret) { fprintf(stderr, "test 0 1 failed\n"); goto err; } ret = test(fd, 1, 0); if (ret) { fprintf(stderr, "test 1 0 failed\n"); goto err; } return 0; err: return 1; } liburing-2.6/test/sq-poll-kthread.c000066400000000000000000000064361461424365000173440ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test if io_uring SQ poll kthread is stopped when the userspace * process ended with or without closing the io_uring fd * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define SQ_THREAD_IDLE 2000 #define BUF_SIZE 128 #define KTHREAD_NAME "io_uring-sq" enum { TEST_OK = 0, TEST_SKIPPED = 1, TEST_FAILED = 2, }; static int do_test_sq_poll_kthread_stopped(bool do_exit) { int ret = 0, pipe1[2]; struct io_uring_params param; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; uint8_t buf[BUF_SIZE]; struct iovec iov; if (pipe(pipe1) != 0) { perror("pipe"); return TEST_FAILED; } memset(¶m, 0, sizeof(param)); param.flags |= IORING_SETUP_SQPOLL; param.sq_thread_idle = SQ_THREAD_IDLE; ret = t_create_ring_params(16, &ring, ¶m); if (ret == T_SETUP_SKIP) { ret = TEST_FAILED; goto err_pipe; } else if (ret != T_SETUP_OK) { fprintf(stderr, "ring setup failed\n"); ret = TEST_FAILED; goto err_pipe; } ret = io_uring_register_files(&ring, &pipe1[1], 1); if (ret) { fprintf(stderr, "file reg failed: %d\n", ret); ret = TEST_FAILED; goto err_uring; } iov.iov_base = buf; iov.iov_len = BUF_SIZE; sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "io_uring_get_sqe failed\n"); ret = TEST_FAILED; goto err_uring; } io_uring_prep_writev(sqe, 0, &iov, 1, 0); sqe->flags |= IOSQE_FIXED_FILE; ret = io_uring_submit(&ring); if (ret < 0) { fprintf(stderr, "io_uring_submit failed - ret: %d\n", ret); ret = TEST_FAILED; goto err_uring; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "io_uring_wait_cqe - ret: %d\n", ret); ret = TEST_FAILED; goto err_uring; } if (cqe->res != BUF_SIZE) { fprintf(stderr, "unexpected cqe->res %d [expected %d]\n", cqe->res, BUF_SIZE); ret = TEST_FAILED; goto err_uring; } io_uring_cqe_seen(&ring, cqe); ret = TEST_OK; err_uring: if (do_exit) io_uring_queue_exit(&ring); err_pipe: close(pipe1[0]); close(pipe1[1]); return ret; } static int test_sq_poll_kthread_stopped(bool do_exit) { pid_t pid; int status = 0; pid = fork(); if (pid == 0) { int ret = do_test_sq_poll_kthread_stopped(do_exit); exit(ret); } pid = wait(&status); if (status != 0) return WEXITSTATUS(status); sleep(1); if (system("ps --ppid 2 | grep " KTHREAD_NAME) == 0) { fprintf(stderr, "%s kthread still running!\n", KTHREAD_NAME); return TEST_FAILED; } return 0; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_sq_poll_kthread_stopped(true); if (ret == TEST_SKIPPED) { printf("test_sq_poll_kthread_stopped_exit: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_sq_poll_kthread_stopped_exit failed\n"); return ret; } ret = test_sq_poll_kthread_stopped(false); if (ret == TEST_SKIPPED) { printf("test_sq_poll_kthread_stopped_noexit: skipped\n"); } else if (ret == TEST_FAILED) { fprintf(stderr, "test_sq_poll_kthread_stopped_noexit failed\n"); return ret; } return 0; } liburing-2.6/test/sq-poll-share.c000066400000000000000000000047141461424365000170210ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test SQPOLL with IORING_SETUP_ATTACH_WQ */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define FILE_SIZE (128 * 1024 * 1024) #define BS 4096 #define BUFFERS 64 #define NR_RINGS 4 static struct iovec *vecs; static int wait_io(struct io_uring *ring, int nr_ios) { struct io_uring_cqe *cqe; while (nr_ios) { int ret = io_uring_wait_cqe(ring, &cqe); if (ret == -EAGAIN) { continue; } else if (ret) { fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret); return 1; } if (cqe->res != BS) { fprintf(stderr, "Unexpected ret %d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); nr_ios--; } return 0; } static int queue_io(struct io_uring *ring, int fd, int nr_ios) { unsigned long off; int i; i = 0; off = 0; while (nr_ios) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(ring); if (!sqe) break; io_uring_prep_read(sqe, fd, vecs[i].iov_base, vecs[i].iov_len, off); nr_ios--; i++; off += BS; } io_uring_submit(ring); return i; } int main(int argc, char *argv[]) { struct io_uring rings[NR_RINGS]; int rets[NR_RINGS]; unsigned long ios; int i, ret, fd; char *fname; if (argc > 1) { fname = argv[1]; } else { fname = ".basic-rw-poll-share"; t_create_file(fname, FILE_SIZE); } vecs = t_create_buffers(BUFFERS, BS); fd = open(fname, O_RDONLY | O_DIRECT); if (fd < 0) { perror("open"); return -1; } if (fname != argv[1]) unlink(fname); for (i = 0; i < NR_RINGS; i++) { struct io_uring_params p = { }; p.flags = IORING_SETUP_SQPOLL; if (i) { p.wq_fd = rings[0].ring_fd; p.flags |= IORING_SETUP_ATTACH_WQ; } ret = io_uring_queue_init_params(BUFFERS, &rings[i], &p); if (ret) { fprintf(stderr, "queue_init: %d/%d\n", ret, i); goto err; } /* no sharing for non-fixed either */ if (!(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { fprintf(stdout, "No SQPOLL sharing, skipping\n"); return 0; } } ios = 0; while (ios < (FILE_SIZE / BS)) { for (i = 0; i < NR_RINGS; i++) { ret = queue_io(&rings[i], fd, BUFFERS); if (ret < 0) goto err; rets[i] = ret; } for (i = 0; i < NR_RINGS; i++) { if (wait_io(&rings[i], rets[i])) goto err; } ios += BUFFERS; } return 0; err: return 1; } liburing-2.6/test/sq-space_left.c000066400000000000000000000054451461424365000170620ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test SQ queue space left * */ #include #include #include #include #include #include #include "liburing.h" static int test_left(void) { struct io_uring_sqe *sqe; struct io_uring ring; int ret, i = 0, s; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } if ((s = io_uring_sq_space_left(&ring)) != 8) { fprintf(stderr, "Got %d SQEs left, expected %d\n", s, 8); goto err; } i = 0; while ((sqe = io_uring_get_sqe(&ring)) != NULL) { i++; if ((s = io_uring_sq_space_left(&ring)) != 8 - i) { fprintf(stderr, "Got %d SQEs left, expected %d\n", s, 8 - i); goto err; } } if (i != 8) { fprintf(stderr, "Got %d SQEs, expected %d\n", i, 8); goto err; } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } static int test_sync(void) { struct io_uring_sqe *sqe; struct io_uring ring; int ret, i; ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } /* prep 8 NOPS */ for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); } /* prep known bad command, this should terminate submission */ sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); sqe->opcode = 0xfe; /* prep 8 NOPS */ for (i = 0; i < 8; i++) { sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_nop(sqe); } /* we should have 8 + 1 + 8 pending now */ ret = io_uring_sq_ready(&ring); if (ret != 17) { fprintf(stderr, "%d ready, wanted 17\n", ret); goto err; } ret = io_uring_submit(&ring); /* should submit 8 successfully, then error #9 and stop */ if (ret != 9) { fprintf(stderr, "submitted %d, wanted 9\n", ret); goto err; } /* should now have 8 ready, with 9 gone */ ret = io_uring_sq_ready(&ring); if (ret != 8) { fprintf(stderr, "%d ready, wanted 8\n", ret); goto err; } ret = io_uring_submit(&ring); /* the last 8 should submit fine */ if (ret != 8) { fprintf(stderr, "submitted %d, wanted 8\n", ret); goto err; } ret = io_uring_sq_ready(&ring); if (ret) { fprintf(stderr, "%d ready, wanted 0\n", ret); goto err; } io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_left(); if (ret) { fprintf(stderr, "test_left failed\n"); return ret; } ret = test_sync(); if (ret) { fprintf(stderr, "test_sync failed\n"); return ret; } return 0; } liburing-2.6/test/sqpoll-disable-exit.c000066400000000000000000000120331461424365000202050ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ // https://syzkaller.appspot.com/bug?id=99f4ea77bb9b9ef24cefb66469be319f4aa9f162 // autogenerated by syzkaller (https://github.com/google/syzkaller) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liburing.h" #include "../src/syscall.h" static void sleep_ms(uint64_t ms) { usleep(ms * 1000); } static uint64_t current_time_ms(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts)) exit(1); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } static bool write_file(const char* file, const char* what, ...) { char buf[1024]; va_list args; va_start(args, what); vsnprintf(buf, sizeof(buf), what, args); va_end(args); buf[sizeof(buf) - 1] = 0; int len = strlen(buf); int fd = open(file, O_WRONLY | O_CLOEXEC); if (fd == -1) return false; if (write(fd, buf, len) != len) { int err = errno; close(fd); errno = err; return false; } close(fd); return true; } #define SIZEOF_IO_URING_SQE 64 #define SIZEOF_IO_URING_CQE 16 #define SQ_HEAD_OFFSET 0 #define SQ_TAIL_OFFSET 64 #define SQ_RING_MASK_OFFSET 256 #define SQ_RING_ENTRIES_OFFSET 264 #define SQ_FLAGS_OFFSET 276 #define SQ_DROPPED_OFFSET 272 #define CQ_HEAD_OFFSET 128 #define CQ_TAIL_OFFSET 192 #define CQ_RING_MASK_OFFSET 260 #define CQ_RING_ENTRIES_OFFSET 268 #define CQ_RING_OVERFLOW_OFFSET 284 #define CQ_FLAGS_OFFSET 280 #define CQ_CQES_OFFSET 320 static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5) { uint32_t entries = (uint32_t)a0; struct io_uring_params* setup_params = (struct io_uring_params*)a1; void* vma1 = (void*)a2; void* vma2 = (void*)a3; void** ring_ptr_out = (void**)a4; void** sqes_ptr_out = (void**)a5; uint32_t fd_io_uring = __sys_io_uring_setup(entries, setup_params); uint32_t sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t); uint32_t cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE; uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQ_RING); uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); return fd_io_uring; } static void kill_and_wait(int pid, int* status) { kill(-pid, SIGKILL); kill(pid, SIGKILL); for (int i = 0; i < 100; i++) { if (waitpid(-1, status, WNOHANG | __WALL) == pid) return; usleep(1000); } DIR* dir = opendir("/sys/fs/fuse/connections"); if (dir) { for (;;) { struct dirent* ent = readdir(dir); if (!ent) break; if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) continue; char abort[300]; snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); int fd = open(abort, O_WRONLY); if (fd == -1) { continue; } if (write(fd, abort, 1) < 0) { } close(fd); } closedir(dir); } else { } while (waitpid(-1, status, __WALL) != pid) { } } static void setup_test(void) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); setpgrp(); write_file("/proc/self/oom_score_adj", "1000"); } static void execute_one(void); #define WAIT_FLAGS __WALL static void loop(void) { int iter = 0; for (; iter < 100; iter++) { int pid = fork(); if (pid < 0) exit(1); if (pid == 0) { setup_test(); execute_one(); exit(0); } int status = 0; uint64_t start = current_time_ms(); for (;;) { if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) break; sleep_ms(1); if (current_time_ms() - start < 5000) { continue; } kill_and_wait(pid, &status); break; } } } void execute_one(void) { *(uint32_t*)0x20000044 = 0; *(uint32_t*)0x20000048 = 0x42; *(uint32_t*)0x2000004c = 0; *(uint32_t*)0x20000050 = 0; *(uint32_t*)0x20000058 = -1; *(uint32_t*)0x2000005c = 0; *(uint32_t*)0x20000060 = 0; *(uint32_t*)0x20000064 = 0; syz_io_uring_setup(0x74bc, 0x20000040, 0x20ffb000, 0x20ffc000, 0, 0); } int main(void) { mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); mmap((void *)0x20000000ul, 0x1000000ul, 7ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); mmap((void *)0x21000000ul, 0x1000ul, 0ul, MAP_ANON|MAP_PRIVATE, -1, 0ul); loop(); return 0; } liburing-2.6/test/sqpoll-exit-hang.c000066400000000000000000000027311461424365000175230ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test that we exit properly with SQPOLL and having a request that * adds a circular reference to the ring itself. */ #include #include #include #include #include #include #include "liburing.h" static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct timeval tv; struct io_uring ring; struct io_uring_sqe *sqe; int ret; if (argc > 1) return 0; p.flags = IORING_SETUP_SQPOLL; p.sq_thread_idle = 100; ret = io_uring_queue_init_params(1, &ring, &p); if (ret) { if (geteuid()) { printf("%s: skipped, not root\n", argv[0]); return 0; } fprintf(stderr, "queue_init=%d\n", ret); return 1; } if (!(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { fprintf(stdout, "Skipping\n"); return 0; } sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, ring.ring_fd, POLLIN); io_uring_submit(&ring); gettimeofday(&tv, NULL); do { usleep(1000); } while (mtime_since_now(&tv) < 1000); return 0; } liburing-2.6/test/sqpoll-sleep.c000066400000000000000000000024231461424365000167450ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test that the sqthread goes to sleep around the specified time, and that * the NEED_WAKEUP flag is then set. */ #include #include #include #include #include #include "liburing.h" static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } int main(int argc, char *argv[]) { struct io_uring_params p = {}; struct timeval tv; struct io_uring ring; int ret; if (argc > 1) return 0; p.flags = IORING_SETUP_SQPOLL; p.sq_thread_idle = 100; ret = io_uring_queue_init_params(1, &ring, &p); if (ret) { if (geteuid()) { printf("%s: skipped, not root\n", argv[0]); return 0; } fprintf(stderr, "queue_init=%d\n", ret); return 1; } gettimeofday(&tv, NULL); do { usleep(1000); if ((*ring.sq.kflags) & IORING_SQ_NEED_WAKEUP) return 0; } while (mtime_since_now(&tv) < 1000); return 1; } liburing-2.6/test/statx.c000066400000000000000000000064411461424365000154740ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various statx(2) tests * */ #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #ifdef __NR_statx static int do_statx(int dfd, const char *path, int flags, unsigned mask, struct statx *statxbuf) { return syscall(__NR_statx, dfd, path, flags, mask, statxbuf); } #else static int do_statx(int dfd, const char *path, int flags, unsigned mask, struct statx *statxbuf) { errno = ENOSYS; return -1; } #endif static int statx_syscall_supported(void) { return errno == ENOSYS ? 0 : -1; } static int test_statx(struct io_uring *ring, const char *path) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct statx x1 = { }, x2 = { }; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_statx(sqe, -1, path, 0, STATX_ALL, &x1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret) return ret; ret = do_statx(-1, path, 0, STATX_ALL, &x2); if (ret < 0) return statx_syscall_supported(); if (memcmp(&x1, &x2, sizeof(x1))) { fprintf(stderr, "Miscompare between io_uring and statx\n"); goto err; } return 0; err: return -1; } static int test_statx_fd(struct io_uring *ring, const char *path) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct statx x1, x2; int ret, fd; fd = open(path, O_RDONLY); if (fd < 0) { perror("open"); return 1; } memset(&x1, 0, sizeof(x1)); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_statx(sqe, fd, "", AT_EMPTY_PATH, STATX_ALL, &x1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret) return ret; memset(&x2, 0, sizeof(x2)); ret = do_statx(fd, "", AT_EMPTY_PATH, STATX_ALL, &x2); if (ret < 0) return statx_syscall_supported(); if (memcmp(&x1, &x2, sizeof(x1))) { fprintf(stderr, "Miscompare between io_uring and statx\n"); goto err; } return 0; err: return -1; } int main(int argc, char *argv[]) { struct io_uring ring; const char *fname; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } if (argc > 1) { fname = argv[1]; } else { fname = "/tmp/.statx"; t_create_file(fname, 4096); } ret = test_statx(&ring, fname); if (ret) { if (ret == -EINVAL) { fprintf(stdout, "statx not supported, skipping\n"); goto done; } fprintf(stderr, "test_statx failed: %d\n", ret); goto err; } ret = test_statx_fd(&ring, fname); if (ret) { fprintf(stderr, "test_statx_fd failed: %d\n", ret); goto err; } done: if (fname != argv[1]) unlink(fname); return 0; err: if (fname != argv[1]) unlink(fname); return 1; } liburing-2.6/test/stdout.c000066400000000000000000000115241461424365000156510ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: check that STDOUT write works */ #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int test_pipe_io_fixed(struct io_uring *ring) { const char str[] = "This is a fixed pipe test\n"; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec vecs[2]; char buffer[128]; int i, ret, fds[2]; t_posix_memalign(&vecs[0].iov_base, 4096, 4096); memcpy(vecs[0].iov_base, str, strlen(str)); vecs[0].iov_len = strlen(str); if (pipe(fds) < 0) { perror("pipe"); return 1; } ret = io_uring_register_buffers(ring, vecs, 1); if (ret) { fprintf(stderr, "Failed to register buffers: %d\n", ret); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_write_fixed(sqe, fds[1], vecs[0].iov_base, vecs[0].iov_len, 0, 0); sqe->user_data = 1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } vecs[1].iov_base = buffer; vecs[1].iov_len = sizeof(buffer); io_uring_prep_readv(sqe, fds[0], &vecs[1], 1, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } else if (ret != 2) { fprintf(stderr, "Submitted only %d\n", ret); goto err; } for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res < 0) { fprintf(stderr, "I/O write error on %lu: %s\n", (unsigned long) cqe->user_data, strerror(-cqe->res)); goto err; } if (cqe->res != strlen(str)) { fprintf(stderr, "Got %d bytes, wanted %d on %lu\n", cqe->res, (int)strlen(str), (unsigned long) cqe->user_data); goto err; } if (cqe->user_data == 2 && memcmp(str, buffer, strlen(str))) { fprintf(stderr, "read data mismatch\n"); goto err; } io_uring_cqe_seen(ring, cqe); } io_uring_unregister_buffers(ring); return 0; err: return 1; } static int test_stdout_io_fixed(struct io_uring *ring) { const char str[] = "This is a fixed pipe test\n"; struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec vecs; int ret; t_posix_memalign(&vecs.iov_base, 4096, 4096); memcpy(vecs.iov_base, str, strlen(str)); vecs.iov_len = strlen(str); ret = io_uring_register_buffers(ring, &vecs, 1); if (ret) { fprintf(stderr, "Failed to register buffers: %d\n", ret); return 1; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_write_fixed(sqe, STDOUT_FILENO, vecs.iov_base, vecs.iov_len, 0, 0); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } else if (ret < 1) { fprintf(stderr, "Submitted only %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res < 0) { fprintf(stderr, "STDOUT write error: %s\n", strerror(-cqe->res)); goto err; } if (cqe->res != vecs.iov_len) { fprintf(stderr, "Got %d write, wanted %d\n", cqe->res, (int)vecs.iov_len); goto err; } io_uring_cqe_seen(ring, cqe); io_uring_unregister_buffers(ring); return 0; err: return 1; } static int test_stdout_io(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct iovec vecs; int ret; vecs.iov_base = "This is a pipe test\n"; vecs.iov_len = strlen(vecs.iov_base); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_writev(sqe, STDOUT_FILENO, &vecs, 1, 0); ret = io_uring_submit(ring); if (ret < 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } else if (ret < 1) { fprintf(stderr, "Submitted only %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } if (cqe->res < 0) { fprintf(stderr, "STDOUT write error: %s\n", strerror(-cqe->res)); goto err; } if (cqe->res != vecs.iov_len) { fprintf(stderr, "Got %d write, wanted %d\n", cqe->res, (int)vecs.iov_len); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_stdout_io(&ring); if (ret) { fprintf(stderr, "test_pipe_io failed\n"); return ret; } ret = test_stdout_io_fixed(&ring); if (ret) { fprintf(stderr, "test_pipe_io_fixed failed\n"); return ret; } ret = test_pipe_io_fixed(&ring); if (ret) { fprintf(stderr, "test_pipe_io_fixed failed\n"); return ret; } return 0; } liburing-2.6/test/submit-and-wait.c000066400000000000000000000037271461424365000173420ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Test that io_uring_submit_and_wait_timeout() returns the * right value (submit count) and that it doesn't end up waiting twice. * */ #include #include #include #include #include #include #include #include "liburing.h" #include "test.h" static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } static int test(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; struct timeval tv; int ret, i; for (i = 0; i < 1; i++) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed at %d\n", i); goto err; } io_uring_prep_nop(sqe); } ts.tv_sec = 1; ts.tv_nsec = 0; gettimeofday(&tv, NULL); ret = io_uring_submit_and_wait_timeout(ring, &cqe, 2, &ts, NULL); if (ret < 0) { fprintf(stderr, "submit_and_wait_timeout: %d\n", ret); goto err; } ret = mtime_since_now(&tv); /* allow some slack, should be around 1s */ if (ret > 1200) { fprintf(stderr, "wait took too long: %d\n", ret); goto err; } return 0; err: return 1; } static int test_ring(void) { struct io_uring ring; struct io_uring_params p = { }; int ret; p.flags = 0; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = test(&ring); if (ret) { fprintf(stderr, "test failed\n"); goto err; } err: io_uring_queue_exit(&ring); return ret; } int main(int argc, char *argv[]) { if (argc > 1) return 0; return test_ring(); } liburing-2.6/test/submit-link-fail.c000066400000000000000000000066221461424365000175010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: tests linked requests failing during submission */ #include #include #include #include #include #include #include #include "liburing.h" #define DRAIN_USER_DATA 42 static int test_underprep_fail(bool hardlink, bool drain, bool link_last, int link_size, int fail_idx) { const int invalid_fd = 42; int link_flags = IOSQE_IO_LINK; int total_submit = link_size; struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; char buffer[1] = { }; int i, ret, fds[2]; if (drain) link_flags |= IOSQE_IO_DRAIN; if (hardlink) link_flags |= IOSQE_IO_HARDLINK; assert(fail_idx < link_size); assert(link_size < 40); /* create a new ring as it leaves it dirty */ ret = io_uring_queue_init(8, &ring, 0); if (ret) { printf("ring setup failed\n"); return -1; } if (pipe(fds)) { perror("pipe"); return -1; } if (drain) { /* clog drain, so following reqs sent to draining */ sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], buffer, sizeof(buffer), 0); sqe->user_data = DRAIN_USER_DATA; sqe->flags |= IOSQE_IO_DRAIN; total_submit++; } for (i = 0; i < link_size; i++) { sqe = io_uring_get_sqe(&ring); if (i == fail_idx) { io_uring_prep_read(sqe, invalid_fd, buffer, 1, 0); sqe->ioprio = (short) -1; } else { io_uring_prep_nop(sqe); } if (i != link_size - 1 || !link_last) sqe->flags |= link_flags; sqe->user_data = i; } ret = io_uring_submit(&ring); if (ret != total_submit) { /* Old behaviour, failed early and under-submitted */ if (ret == fail_idx + 1 + drain) goto out; fprintf(stderr, "submit failed: %d\n", ret); return -1; } if (drain) { /* unclog drain */ ret = write(fds[1], buffer, sizeof(buffer)); if (ret < 0) { perror("write"); return 1; } } for (i = 0; i < total_submit; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } ret = cqe->res; if (cqe->user_data == DRAIN_USER_DATA) { if (ret != 1) { fprintf(stderr, "drain failed %d\n", ret); return 1; } } else if (cqe->user_data == fail_idx) { if (ret == 0 || ret == -ECANCELED) { fprintf(stderr, "half-prep req unexpected return %d\n", ret); return 1; } } else { if (ret != -ECANCELED) { fprintf(stderr, "cancel failed %d, ud %d\n", ret, (int)cqe->user_data); return 1; } } io_uring_cqe_seen(&ring, cqe); } out: close(fds[0]); close(fds[1]); io_uring_queue_exit(&ring); return 0; } int main(int argc, char *argv[]) { int ret, link_size, fail_idx, i; if (argc > 1) return 0; /* * hardlink, size=3, fail_idx=1, drain=false -- kernel fault * link, size=3, fail_idx=0, drain=true -- kernel fault * link, size=3, fail_idx=1, drain=true -- invalid cqe->res */ for (link_size = 0; link_size < 3; link_size++) { for (fail_idx = 0; fail_idx < link_size; fail_idx++) { for (i = 0; i < 8; i++) { bool hardlink = (i & 1) != 0; bool drain = (i & 2) != 0; bool link_last = (i & 4) != 0; ret = test_underprep_fail(hardlink, drain, link_last, link_size, fail_idx); if (!ret) continue; fprintf(stderr, "failed %d, hard %d, drain %d," "link_last %d, size %d, idx %d\n", ret, hardlink, drain, link_last, link_size, fail_idx); return 1; } } } return 0; } liburing-2.6/test/submit-reuse.c000066400000000000000000000104261461424365000167530ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test reads that will punt to blocking context, with immediate overwrite * of iovec->iov_base to NULL. If the kernel doesn't properly handle * reuse of the iovec, we should get -EFAULT. */ #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define STR_SIZE 32768 #define FILE_SIZE 65536 struct thread_data { int fd1, fd2; volatile int do_exit; }; static void *flusher(void *__data) { struct thread_data *data = __data; while (!data->do_exit) { posix_fadvise(data->fd1, 0, FILE_SIZE, POSIX_FADV_DONTNEED); posix_fadvise(data->fd2, 0, FILE_SIZE, POSIX_FADV_DONTNEED); usleep(10); } return NULL; } static char str1[STR_SIZE]; static char str2[STR_SIZE]; static struct io_uring ring; static int no_stable; static int prep(int fd, char *str, int split, int async) { struct io_uring_sqe *sqe; struct iovec iovs[16]; int ret, i; if (split) { int vsize = STR_SIZE / 16; void *ptr = str; for (i = 0; i < 16; i++) { iovs[i].iov_base = ptr; iovs[i].iov_len = vsize; ptr += vsize; } } else { iovs[0].iov_base = str; iovs[0].iov_len = STR_SIZE; } sqe = io_uring_get_sqe(&ring); io_uring_prep_readv(sqe, fd, iovs, split ? 16 : 1, 0); sqe->user_data = fd; if (async) sqe->flags = IOSQE_ASYNC; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "submit got %d\n", ret); return 1; } if (split) { for (i = 0; i < 16; i++) iovs[i].iov_base = NULL; } else { iovs[0].iov_base = NULL; } return 0; } static int wait_nr(int nr) { int i, ret; for (i = 0; i < nr; i++) { struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(&ring, &cqe); if (ret) return ret; if (cqe->res < 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } return 0; } static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } static int test_reuse(int argc, char *argv[], int split, int async) { struct thread_data data; struct io_uring_params p = { }; int fd1, fd2, ret, i; struct timeval tv; pthread_t thread; char *fname1 = ".reuse.1"; int do_unlink = 1; void *tret; ret = io_uring_queue_init_params(32, &ring, &p); if (ret) { fprintf(stderr, "io_uring_queue_init: %d\n", ret); return 1; } if (!(p.features & IORING_FEAT_SUBMIT_STABLE)) { fprintf(stdout, "FEAT_SUBMIT_STABLE not there, skipping\n"); io_uring_queue_exit(&ring); no_stable = 1; return 0; } if (argc > 1) { fname1 = argv[1]; do_unlink = 0; } else { t_create_file(fname1, FILE_SIZE); } fd1 = open(fname1, O_RDONLY); if (do_unlink) unlink(fname1); if (fd1 < 0) { perror("open fname1"); goto err; } t_create_file(".reuse.2", FILE_SIZE); fd2 = open(".reuse.2", O_RDONLY); unlink(".reuse.2"); if (fd2 < 0) { perror("open .reuse.2"); goto err; } data.fd1 = fd1; data.fd2 = fd2; data.do_exit = 0; pthread_create(&thread, NULL, flusher, &data); usleep(10000); gettimeofday(&tv, NULL); for (i = 0; i < 1000; i++) { ret = prep(fd1, str1, split, async); if (ret) { fprintf(stderr, "prep1 failed: %d\n", ret); goto err; } ret = prep(fd2, str2, split, async); if (ret) { fprintf(stderr, "prep1 failed: %d\n", ret); goto err; } ret = wait_nr(2); if (ret) { fprintf(stderr, "wait_nr: %d\n", ret); goto err; } if (mtime_since_now(&tv) > 5000) break; } data.do_exit = 1; pthread_join(thread, &tret); close(fd2); close(fd1); io_uring_queue_exit(&ring); return 0; err: io_uring_queue_exit(&ring); return 1; } int main(int argc, char *argv[]) { int ret, i; for (i = 0; i < 4; i++) { int split, async; split = (i & 1) != 0; async = (i & 2) != 0; ret = test_reuse(argc, argv, split, async); if (ret) { fprintf(stderr, "test_reuse %d %d failed\n", split, async); return ret; } if (no_stable) break; } return 0; } liburing-2.6/test/symlink.c000066400000000000000000000046401461424365000160160ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring symlinkat handling */ #include #include #include #include #include #include #include "liburing.h" static int do_symlinkat(struct io_uring *ring, const char *oldname, const char *newname) { int ret; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "sqe get failed\n"); goto err; } io_uring_prep_symlinkat(sqe, oldname, AT_FDCWD, newname); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqes(ring, &cqe, 1, 0, 0); if (ret) { fprintf(stderr, "wait_cqe failed: %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int test_link_contents(const char* linkname, const char *expected_contents) { char buf[128]; int ret = readlink(linkname, buf, 127); if (ret < 0) { perror("readlink"); return ret; } buf[ret] = 0; if (strncmp(buf, expected_contents, 128)) { fprintf(stderr, "link contents differs from expected: '%s' vs '%s'", buf, expected_contents); return -1; } return 0; } int main(int argc, char *argv[]) { static const char target[] = "io_uring-symlinkat-test-target"; static const char linkname[] = "io_uring-symlinkat-test-link"; int ret; struct io_uring ring; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "queue init failed: %d\n", ret); return ret; } ret = do_symlinkat(&ring, target, linkname); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "symlinkat not supported, skipping\n"); goto out; } fprintf(stderr, "symlinkat: %s\n", strerror(-ret)); goto err; } else if (ret) { goto err; } ret = test_link_contents(linkname, target); if (ret < 0) goto err1; ret = do_symlinkat(&ring, target, linkname); if (ret != -EEXIST) { fprintf(stderr, "test_symlinkat linkname already exists failed: %d\n", ret); goto err1; } ret = do_symlinkat(&ring, target, "surely/this/does/not/exist"); if (ret != -ENOENT) { fprintf(stderr, "test_symlinkat no parent failed: %d\n", ret); goto err1; } out: unlinkat(AT_FDCWD, linkname, 0); io_uring_queue_exit(&ring); return 0; err1: unlinkat(AT_FDCWD, linkname, 0); err: io_uring_queue_exit(&ring); return 1; } liburing-2.6/test/sync-cancel.c000066400000000000000000000117061461424365000165300ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test io_uring_register_sync_cancel() * */ #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" static int no_sync_cancel; static int test_sync_cancel_timeout(struct io_uring *ring, int async) { struct io_uring_sync_cancel_reg reg = { }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2], to_prep; char buf[32]; if (pipe(fds) < 0) { perror("pipe"); return 1; } to_prep = 1; sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); sqe->user_data = 0x89; if (async) sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(ring); if (ret != to_prep) { fprintf(stderr, "submit=%d\n", ret); return 1; } usleep(10000); reg.addr = 0x89; reg.timeout.tv_nsec = 1; ret = io_uring_register_sync_cancel(ring, ®); if (async) { /* we expect -ETIME here, but can race and get 0 */ if (ret != -ETIME && ret != 0) { fprintf(stderr, "sync_cancel=%d\n", ret); return 1; } } else { if (ret < 0) { fprintf(stderr, "sync_cancel=%d\n", ret); return 1; } } /* * we could _almost_ use peek_cqe() here, but there is still * a small gap where io-wq is done with the request and on * its way to posting a completion, but hasn't done it just * yet. the request is canceled and won't be doing any IO * to buffers etc, but the cqe may not have quite arrived yet. */ ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "peek=%d\n", ret); return 1; } if (cqe->res >= 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); return 0; } static int test_sync_cancel(struct io_uring *ring, int async, int nr_all, int use_fd) { struct io_uring_sync_cancel_reg reg = { }; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, fds[2], to_prep, i; char buf[32]; if (pipe(fds) < 0) { perror("pipe"); return 1; } to_prep = 1; if (nr_all) to_prep = 4; for (i = 0; i < to_prep; i++) { sqe = io_uring_get_sqe(ring); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); sqe->user_data = 0x89; if (async) sqe->flags |= IOSQE_ASYNC; } ret = io_uring_submit(ring); if (ret != to_prep) { fprintf(stderr, "submit=%d\n", ret); return 1; } usleep(10000); if (!use_fd) reg.addr = 0x89; else reg.fd = fds[0]; reg.timeout.tv_sec = 200; if (nr_all) reg.flags |= IORING_ASYNC_CANCEL_ALL; if (use_fd) reg.flags |= IORING_ASYNC_CANCEL_FD; ret = io_uring_register_sync_cancel(ring, ®); if (ret < 0) { if (ret == -EINVAL && !no_sync_cancel) { no_sync_cancel = 1; return 0; } fprintf(stderr, "sync_cancel=%d\n", ret); return 1; } for (i = 0; i < to_prep; i++) { /* * we could _almost_ use peek_cqe() here, but there is still * a small gap where io-wq is done with the request and on * its way to posting a completion, but hasn't done it just * yet. the request is canceled and won't be doing any IO * to buffers etc, but the cqe may not have quite arrived yet. */ ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "peek=%d\n", ret); return 1; } if (cqe->res >= 0) { fprintf(stderr, "cqe->res=%d\n", cqe->res); return 1; } io_uring_cqe_seen(ring, cqe); } return 0; } int main(int argc, char *argv[]) { struct io_uring ring; int ret; if (argc > 1) return T_EXIT_SKIP; ret = t_create_ring(7, &ring, 0); if (ret == T_SETUP_SKIP) return T_EXIT_SKIP; else if (ret != T_SETUP_OK) return ret; ret = test_sync_cancel(&ring, 0, 0, 0); if (ret) { fprintf(stderr, "test_sync_cancel 0 0 0 failed\n"); return T_EXIT_FAIL; } if (no_sync_cancel) return T_EXIT_SKIP; ret = test_sync_cancel(&ring, 1, 0, 0); if (ret) { fprintf(stderr, "test_sync_cancel 1 0 0 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 0, 1, 0); if (ret) { fprintf(stderr, "test_sync_cancel 0 1 0 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 1, 1, 0); if (ret) { fprintf(stderr, "test_sync_cancel 1 1 0 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 0, 0, 1); if (ret) { fprintf(stderr, "test_sync_cancel 0 0 1 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 1, 0, 1); if (ret) { fprintf(stderr, "test_sync_cancel 1 0 1 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 0, 1, 1); if (ret) { fprintf(stderr, "test_sync_cancel 0 1 1 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel(&ring, 1, 1, 1); if (ret) { fprintf(stderr, "test_sync_cancel 1 1 1 failed\n"); return T_EXIT_FAIL; } ret = test_sync_cancel_timeout(&ring, 0); if (ret) { fprintf(stderr, "test_sync_cancel_timeout 0\n"); return T_EXIT_FAIL; } /* must be last, leaves request */ ret = test_sync_cancel_timeout(&ring, 1); if (ret) { fprintf(stderr, "test_sync_cancel_timeout 1\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/teardowns.c000066400000000000000000000015011461424365000163270ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "liburing.h" static void loop(void) { int i, ret = 0; for (i = 0; i < 100; i++) { struct io_uring ring; int fd; memset(&ring, 0, sizeof(ring)); fd = io_uring_queue_init(0xa4, &ring, 0); if (fd >= 0) { close(fd); continue; } if (fd != -ENOMEM) ret++; } exit(ret); } int main(int argc, char *argv[]) { int i, ret, status; if (argc > 1) return 0; for (i = 0; i < 12; i++) { if (!fork()) { loop(); break; } } ret = 0; for (i = 0; i < 12; i++) { if (waitpid(-1, &status, 0) < 0) { perror("waitpid"); return 1; } if (WEXITSTATUS(status)) ret++; } return ret; } liburing-2.6/test/test.h000066400000000000000000000015421461424365000153120ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: Test configs for tests. */ #ifndef LIBURING_TEST_H #define LIBURING_TEST_H #ifdef __cplusplus extern "C" { #endif typedef struct io_uring_test_config { unsigned int flags; const char *description; } io_uring_test_config; __attribute__((__unused__)) static io_uring_test_config io_uring_test_configs[] = { { 0, "default" }, { IORING_SETUP_SQE128, "large SQE"}, { IORING_SETUP_CQE32, "large CQE"}, { IORING_SETUP_SQE128 | IORING_SETUP_CQE32, "large SQE/CQE" }, }; #define FOR_ALL_TEST_CONFIGS \ for (int i = 0; i < sizeof(io_uring_test_configs) / sizeof(io_uring_test_configs[0]); i++) #define IORING_GET_TEST_CONFIG_FLAGS() (io_uring_test_configs[i].flags) #define IORING_GET_TEST_CONFIG_DESCRIPTION() (io_uring_test_configs[i].description) #ifdef __cplusplus } #endif #endif liburing-2.6/test/thread-exit.c000066400000000000000000000046661461424365000165560ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that thread pool issued requests don't cancel on thread * exit, but do get canceled once the parent exits. Do both * writes that finish and a poll request that sticks around. * */ #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #define NR_IOS 8 #define WSIZE 512 struct d { int fd; struct io_uring *ring; unsigned long off; int pipe_fd; int err; int i; }; static char *g_buf[NR_IOS] = {NULL}; static void free_g_buf(void) { int i; for (i = 0; i < NR_IOS; i++) free(g_buf[i]); } static void *do_io(void *data) { struct d *d = data; struct io_uring_sqe *sqe; char *buffer; int ret; buffer = t_malloc(WSIZE); g_buf[d->i] = buffer; memset(buffer, 0x5a, WSIZE); sqe = io_uring_get_sqe(d->ring); if (!sqe) { d->err++; return NULL; } io_uring_prep_write(sqe, d->fd, buffer, WSIZE, d->off); sqe->user_data = d->off; sqe = io_uring_get_sqe(d->ring); if (!sqe) { d->err++; return NULL; } io_uring_prep_poll_add(sqe, d->pipe_fd, POLLIN); ret = io_uring_submit(d->ring); if (ret != 2) d->err++; return NULL; } int main(int argc, char *argv[]) { struct io_uring ring; const char *fname; pthread_t thread; int ret, do_unlink, i, fd; struct d d; int fds[2]; if (pipe(fds) < 0) { perror("pipe"); return 1; } ret = io_uring_queue_init(32, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } if (argc > 1) { fname = argv[1]; do_unlink = 0; } else { fname = ".thread.exit"; do_unlink = 1; t_create_file(fname, 4096); } fd = open(fname, O_WRONLY); if (do_unlink) unlink(fname); if (fd < 0) { perror("open"); return 1; } d.fd = fd; d.ring = ˚ d.off = 0; d.pipe_fd = fds[0]; d.err = 0; for (i = 0; i < NR_IOS; i++) { d.i = i; memset(&thread, 0, sizeof(thread)); pthread_create(&thread, NULL, do_io, &d); pthread_join(thread, NULL); d.off += WSIZE; } for (i = 0; i < NR_IOS; i++) { struct io_uring_cqe *cqe; ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "io_uring_wait_cqe=%d\n", ret); goto err; } if (cqe->res != WSIZE) { fprintf(stderr, "cqe->res=%d, Expected %d\n", cqe->res, WSIZE); goto err; } io_uring_cqe_seen(&ring, cqe); } free_g_buf(); return d.err; err: free_g_buf(); return 1; } liburing-2.6/test/timeout-new.c000066400000000000000000000131061461424365000166020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: tests for getevents timeout * */ #include #include #include #include #include "liburing.h" #define TIMEOUT_MSEC 200 #define TIMEOUT_SEC 10 static int thread_ret0, thread_ret1; static int cnt = 0; static pthread_mutex_t mutex; static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec) { ts->tv_sec = msec / 1000; ts->tv_nsec = (msec % 1000) * 1000000; } static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } static int test_return_before_timeout(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; bool retried = false; struct __kernel_timespec ts; msec_to_ts(&ts, TIMEOUT_MSEC); sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); return 1; } again: ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts); if (ret == -ETIME && (ring->flags & IORING_SETUP_SQPOLL) && !retried) { /* * there is a small chance SQPOLL hasn't been waked up yet, * give it one more try. */ printf("warning: funky SQPOLL timing\n"); sleep(1); retried = true; goto again; } else if (ret < 0) { fprintf(stderr, "%s: timeout error: %d\n", __FUNCTION__, ret); return 1; } io_uring_cqe_seen(ring, cqe); return 0; } static int test_return_after_timeout(struct io_uring *ring) { struct io_uring_cqe *cqe; int ret; struct __kernel_timespec ts; struct timeval tv; unsigned long long exp; msec_to_ts(&ts, TIMEOUT_MSEC); gettimeofday(&tv, NULL); ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts); exp = mtime_since_now(&tv); if (ret != -ETIME) { fprintf(stderr, "%s: timeout error: %d\n", __FUNCTION__, ret); return 1; } if (exp < TIMEOUT_MSEC / 2 || exp > (TIMEOUT_MSEC * 3) / 2) { fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); return 1; } return 0; } static int __reap_thread_fn(void *data) { struct io_uring *ring = (struct io_uring *)data; struct io_uring_cqe *cqe; struct __kernel_timespec ts; msec_to_ts(&ts, TIMEOUT_SEC); pthread_mutex_lock(&mutex); cnt++; pthread_mutex_unlock(&mutex); return io_uring_wait_cqe_timeout(ring, &cqe, &ts); } static void *reap_thread_fn0(void *data) { thread_ret0 = __reap_thread_fn(data); return NULL; } static void *reap_thread_fn1(void *data) { thread_ret1 = __reap_thread_fn(data); return NULL; } /* * This is to test issuing a sqe in main thread and reaping it in two child-thread * at the same time. To see if timeout feature works or not. */ static int test_multi_threads_timeout(void) { struct io_uring ring; int ret; bool both_wait = false; pthread_t reap_thread0, reap_thread1; struct io_uring_sqe *sqe; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "%s: ring setup failed: %d\n", __FUNCTION__, ret); return 1; } pthread_create(&reap_thread0, NULL, reap_thread_fn0, &ring); pthread_create(&reap_thread1, NULL, reap_thread_fn1, &ring); /* * make two threads both enter io_uring_wait_cqe_timeout() before issuing the sqe * as possible as we can. So that there are two threads in the ctx->wait queue. * In this way, we can test if a cqe wakes up two threads at the same time. */ while(!both_wait) { pthread_mutex_lock(&mutex); if (cnt == 2) both_wait = true; pthread_mutex_unlock(&mutex); sleep(1); } sqe = io_uring_get_sqe(&ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_nop(sqe); ret = io_uring_submit(&ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } pthread_join(reap_thread0, NULL); pthread_join(reap_thread1, NULL); if ((thread_ret0 && thread_ret0 != -ETIME) || (thread_ret1 && thread_ret1 != -ETIME)) { fprintf(stderr, "%s: thread wait cqe timeout failed: %d %d\n", __FUNCTION__, thread_ret0, thread_ret1); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring_normal, ring_sq; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(8, &ring_normal, 0); if (ret) { fprintf(stderr, "ring_normal setup failed: %d\n", ret); return 1; } if (!(ring_normal.features & IORING_FEAT_EXT_ARG)) { fprintf(stderr, "feature IORING_FEAT_EXT_ARG not supported, skipping.\n"); return 0; } ret = test_return_before_timeout(&ring_normal); if (ret) { fprintf(stderr, "ring_normal: test_return_before_timeout failed\n"); return ret; } ret = test_return_after_timeout(&ring_normal); if (ret) { fprintf(stderr, "ring_normal: test_return_after_timeout failed\n"); return ret; } ret = io_uring_queue_init(8, &ring_sq, IORING_SETUP_SQPOLL); if (ret) { fprintf(stderr, "ring_sq setup failed: %d\n", ret); return 1; } ret = test_return_before_timeout(&ring_sq); if (ret) { fprintf(stderr, "ring_sq: test_return_before_timeout failed\n"); return ret; } ret = test_return_after_timeout(&ring_sq); if (ret) { fprintf(stderr, "ring_sq: test_return_after_timeout failed\n"); return ret; } ret = test_multi_threads_timeout(); if (ret) { fprintf(stderr, "test_multi_threads_timeout failed\n"); return ret; } return 0; } liburing-2.6/test/timeout.c000066400000000000000000001140141461424365000160130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various timeout tests * */ #include #include #include #include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" #include "../src/syscall.h" #define TIMEOUT_MSEC 200 static int not_supported; static int no_modify; static int no_multishot; static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec) { ts->tv_sec = msec / 1000; ts->tv_nsec = (msec % 1000) * 1000000; } static unsigned long long mtime_since(const struct timeval *s, const struct timeval *e) { long long sec, usec; sec = e->tv_sec - s->tv_sec; usec = (e->tv_usec - s->tv_usec); if (sec > 0 && usec < 0) { sec--; usec += 1000000; } sec *= 1000; usec /= 1000; return sec + usec; } static unsigned long long mtime_since_now(struct timeval *tv) { struct timeval end; gettimeofday(&end, NULL); return mtime_since(tv, &end); } /* * Test that we return to userspace if a timeout triggers, even if we * don't satisfy the number of events asked for. */ static int test_single_timeout_many(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; unsigned long long exp; struct __kernel_timespec ts; struct timeval tv; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 0, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } gettimeofday(&tv, NULL); ret = __sys_io_uring_enter(ring->ring_fd, 0, 4, IORING_ENTER_GETEVENTS, NULL); if (ret < 0) { fprintf(stderr, "%s: io_uring_enter %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret == -EINVAL) { fprintf(stdout, "Timeout not supported, ignored\n"); not_supported = 1; return 0; } else if (ret != -ETIME) { fprintf(stderr, "Timeout: %s\n", strerror(-ret)); goto err; } exp = mtime_since_now(&tv); if (exp >= TIMEOUT_MSEC / 2 && exp <= (TIMEOUT_MSEC * 3) / 2) return 0; fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); err: return 1; } /* * Test numbered trigger of timeout */ static int test_single_timeout_nr(struct io_uring *ring, int nr) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int i, ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, nr, 0); sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) 1); sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) 1); ret = io_uring_submit_and_wait(ring, 3); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } i = 0; while (i < 3) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; /* * NOP commands have user_data as 1. Check that we get the * at least 'nr' NOPs first, then the successfully removed timeout. */ if (io_uring_cqe_get_data(cqe) == NULL) { if (i < nr) { fprintf(stderr, "%s: timeout received too early\n", __FUNCTION__); goto err; } if (ret) { fprintf(stderr, "%s: timeout triggered by passage of" " time, not by events completed\n", __FUNCTION__); goto err; } } io_uring_cqe_seen(ring, cqe); if (ret) { fprintf(stderr, "res: %d\n", ret); goto err; } i++; } return 0; err: return 1; } static int test_single_timeout_wait(struct io_uring *ring, struct io_uring_params *p) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int i, ret; sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) 1); sqe = io_uring_get_sqe(ring); io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) 1); /* no implied submit for newer kernels */ if (p->features & IORING_FEAT_EXT_ARG) { ret = io_uring_submit(ring); if (ret != 2) { fprintf(stderr, "%s: submit %d\n", __FUNCTION__, ret); return 1; } } msec_to_ts(&ts, 1000); i = 0; do { ret = io_uring_wait_cqes(ring, &cqe, 2, &ts, NULL); if (ret == -ETIME) break; if (ret < 0) { fprintf(stderr, "%s: wait timeout failed: %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret < 0) { fprintf(stderr, "res: %d\n", ret); goto err; } i++; } while (1); if (i != 2) { fprintf(stderr, "got %d completions\n", i); goto err; } return 0; err: return 1; } /* * Test single timeout waking us up */ static int test_single_timeout(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; unsigned long long exp; struct __kernel_timespec ts; struct timeval tv; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 0, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } gettimeofday(&tv, NULL); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret == -EINVAL) { fprintf(stdout, "%s: Timeout not supported, ignored\n", __FUNCTION__); not_supported = 1; return 0; } else if (ret != -ETIME) { fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret)); goto err; } exp = mtime_since_now(&tv); if (exp >= TIMEOUT_MSEC / 2 && exp <= (TIMEOUT_MSEC * 3) / 2) return 0; fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); err: return 1; } static int test_single_timeout_remove_notfound(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret, i; if (no_modify) return 0; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 2, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_remove(sqe, 2, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } /* * We should get two completions. One is our modify request, which should * complete with -ENOENT. The other is the timeout that will trigger after * TIMEOUT_MSEC. */ for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->user_data == 2) { if (cqe->res != -ENOENT) { fprintf(stderr, "%s: modify ret %d, wanted ENOENT\n", __FUNCTION__, cqe->res); break; } } else if (cqe->user_data == 1) { if (cqe->res != -ETIME) { fprintf(stderr, "%s: timeout ret %d, wanted -ETIME\n", __FUNCTION__, cqe->res); break; } } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int test_single_timeout_remove(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret, i; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 1; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_remove(sqe, 1, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } /* * We should have two completions ready. One is for the original timeout * request, user_data == 1, that should have a ret of -ECANCELED. The other * is for our modify request, user_data == 2, that should have a ret of 0. */ for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (no_modify) goto seen; if (cqe->res == -EINVAL && cqe->user_data == 2) { fprintf(stdout, "Timeout modify not supported, ignoring\n"); no_modify = 1; goto seen; } if (cqe->user_data == 1) { if (cqe->res != -ECANCELED) { fprintf(stderr, "%s: timeout ret %d, wanted canceled\n", __FUNCTION__, cqe->res); break; } } else if (cqe->user_data == 2) { if (cqe->res) { fprintf(stderr, "%s: modify ret %d, wanted 0\n", __FUNCTION__, cqe->res); break; } } seen: io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test single absolute timeout waking us up */ static int test_single_timeout_abs(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; unsigned long long exp; struct __kernel_timespec ts; struct timespec abs_ts; struct timeval tv; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } clock_gettime(CLOCK_MONOTONIC, &abs_ts); ts.tv_sec = abs_ts.tv_sec + 1; ts.tv_nsec = abs_ts.tv_nsec; io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ABS); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } gettimeofday(&tv, NULL); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret == -EINVAL) { fprintf(stdout, "Absolute timeouts not supported, ignored\n"); return 0; } else if (ret != -ETIME) { fprintf(stderr, "Timeout: %s\n", strerror(-ret)); goto err; } exp = mtime_since_now(&tv); if (exp >= 1000 / 2 && exp <= (1000 * 3) / 2) return 0; fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp); err: return 1; } /* * Test that timeout is canceled on exit */ static int test_single_timeout_exit(struct io_uring *ring) { struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, 30000); io_uring_prep_timeout(sqe, &ts, 0, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } io_uring_queue_exit(ring); return 0; err: io_uring_queue_exit(ring); return 1; } /* * Test multi timeouts waking us up */ static int test_multi_timeout(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts[2]; unsigned int timeout[2]; unsigned long long exp; struct timeval tv; int ret, i; /* req_1: timeout req, count = 1, time = (TIMEOUT_MSEC * 2) */ timeout[0] = TIMEOUT_MSEC * 2; msec_to_ts(&ts[0], timeout[0]); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts[0], 1, 0); sqe->user_data = 1; /* req_2: timeout req, count = 1, time = TIMEOUT_MSEC */ timeout[1] = TIMEOUT_MSEC; msec_to_ts(&ts[1], timeout[1]); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts[1], 1, 0); sqe->user_data = 2; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } gettimeofday(&tv, NULL); for (i = 0; i < 2; i++) { unsigned int time = 0; __u64 user_data = 0; ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } /* * Both of these two reqs should timeout, but req_2 should * return before req_1. */ switch (i) { case 0: user_data = 2; time = timeout[1]; break; case 1: user_data = 1; time = timeout[0]; break; } if (cqe->user_data != user_data) { fprintf(stderr, "%s: unexpected timeout req %d sequence\n", __FUNCTION__, i+1); goto err; } if (cqe->res != -ETIME) { fprintf(stderr, "%s: Req %d timeout: %s\n", __FUNCTION__, i+1, strerror(cqe->res)); goto err; } exp = mtime_since_now(&tv); if (exp < time / 2 || exp > (time * 3) / 2) { fprintf(stderr, "%s: Req %d timeout seems wonky (got %llu)\n", __FUNCTION__, i+1, exp); goto err; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test multi timeout req with different count */ static int test_multi_timeout_nr(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret, i; msec_to_ts(&ts, TIMEOUT_MSEC); /* req_1: timeout req, count = 2 */ sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 2, 0); sqe->user_data = 1; /* req_2: timeout req, count = 1 */ sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 1, 0); sqe->user_data = 2; /* req_3: nop req */ sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_nop(sqe); io_uring_sqe_set_data(sqe, (void *) 1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } /* * req_2 (count=1) should return without error and req_1 (count=2) * should timeout. */ for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } switch (i) { case 0: /* Should be nop req */ if (io_uring_cqe_get_data(cqe) != (void *) 1) { fprintf(stderr, "%s: nop not seen as 1 or 2\n", __FUNCTION__); goto err; } break; case 1: /* Should be timeout req_2 */ if (cqe->user_data != 2) { fprintf(stderr, "%s: unexpected timeout req %d sequence\n", __FUNCTION__, i+1); goto err; } if (cqe->res < 0) { fprintf(stderr, "%s: Req %d res %d\n", __FUNCTION__, i+1, cqe->res); goto err; } break; case 2: /* Should be timeout req_1 */ if (cqe->user_data != 1) { fprintf(stderr, "%s: unexpected timeout req %d sequence\n", __FUNCTION__, i+1); goto err; } if (cqe->res != -ETIME) { fprintf(stderr, "%s: Req %d timeout: %s\n", __FUNCTION__, i+1, strerror(cqe->res)); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test timeout timeout timeout */ static int test_timeout_flags1(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret, i; msec_to_ts(&ts, TIMEOUT_MSEC); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 2; sqe->flags |= IOSQE_IO_DRAIN; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->res == -EINVAL) { if (!i) fprintf(stdout, "%s: timeout flags not supported\n", __FUNCTION__); io_uring_cqe_seen(ring, cqe); continue; } switch (cqe->user_data) { case 1: if (cqe->res != -ETIME) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ETIME); goto err; } break; case 2: if (cqe->res != -ECANCELED) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ECANCELED); goto err; } break; case 3: if (cqe->res != -ETIME) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ETIME); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test timeout timeout timeout */ static int test_timeout_flags2(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret, i; msec_to_ts(&ts, TIMEOUT_MSEC); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 2; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->res == -EINVAL) { if (!i) fprintf(stdout, "%s: timeout flags not supported\n", __FUNCTION__); io_uring_cqe_seen(ring, cqe); continue; } switch (cqe->user_data) { case 1: if (cqe->res != -ETIME) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ETIME); goto err; } break; case 2: case 3: if (cqe->res != -ECANCELED) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ECANCELED); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } /* * Test timeout timeout timeout */ static int test_timeout_flags3(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret, i; msec_to_ts(&ts, TIMEOUT_MSEC); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 1; sqe->flags |= IOSQE_IO_DRAIN; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 2; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 3; ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->res == -EINVAL) { if (!i) fprintf(stdout, "%s: timeout flags not supported\n", __FUNCTION__); io_uring_cqe_seen(ring, cqe); continue; } switch (cqe->user_data) { case 1: case 2: if (cqe->res != -ETIME) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ETIME); goto err; } break; case 3: if (cqe->res != -ECANCELED) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ECANCELED); goto err; } break; } io_uring_cqe_seen(ring, cqe); } return 0; err: return 1; } static int test_update_timeout(struct io_uring *ring, unsigned long ms, bool abs, bool async, bool linked) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts, ts_upd; unsigned long long exp_ms, base_ms = 10000; struct timeval tv; int ret, i, nr = 2; __u32 mode = abs ? IORING_TIMEOUT_ABS : 0; msec_to_ts(&ts_upd, ms); gettimeofday(&tv, NULL); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, base_ms); io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->user_data = 1; if (linked) { sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_nop(sqe); sqe->user_data = 3; sqe->flags = IOSQE_IO_LINK; if (async) sqe->flags |= IOSQE_ASYNC; nr++; } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_update(sqe, &ts_upd, 1, mode); sqe->user_data = 2; if (async) sqe->flags |= IOSQE_ASYNC; ret = io_uring_submit(ring); if (ret != nr) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (i = 0; i < nr; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } switch (cqe->user_data) { case 1: if (cqe->res != -ETIME) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -ETIME); goto err; } break; case 2: if (cqe->res != 0) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, 0); goto err; } break; case 3: if (cqe->res != 0) { fprintf(stderr, "nop failed\n"); goto err; } break; default: goto err; } io_uring_cqe_seen(ring, cqe); } exp_ms = mtime_since_now(&tv); if (exp_ms >= base_ms / 2) { fprintf(stderr, "too long, timeout wasn't updated\n"); goto err; } if (ms >= 1000 && !abs && exp_ms < ms / 2) { fprintf(stderr, "fired too early, potentially updated to 0 ms" "instead of %lu\n", ms); goto err; } return 0; err: return 1; } static int test_update_nonexistent_timeout(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, 0); io_uring_prep_timeout_update(sqe, &ts, 42, 0); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; if (ret == -ENOENT) ret = 0; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int test_update_invalid_flags(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_remove(sqe, 0, IORING_TIMEOUT_ABS); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->res != -EINVAL) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -EINVAL); goto err; } io_uring_cqe_seen(ring, cqe); sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, 0); io_uring_prep_timeout_update(sqe, &ts, 0, -1); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (cqe->res != -EINVAL) { fprintf(stderr, "%s: got %d, wanted %d\n", __FUNCTION__, cqe->res, -EINVAL); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static int fill_exec_target(char *dst, char *path) { struct stat sb; /* * Should either be ./exec-target.t or test/exec-target.t */ sprintf(dst, "%s", path); return stat(dst, &sb); } static int test_timeout_link_cancel(void) { struct io_uring ring; struct io_uring_cqe *cqe; char prog_path[PATH_MAX]; pid_t p; int ret, i, wstatus; if (fill_exec_target(prog_path, "./exec-target.t") && fill_exec_target(prog_path, "test/exec-target.t")) { fprintf(stdout, "Can't find exec-target, skipping\n"); return 0; } ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } p = fork(); if (p == -1) { fprintf(stderr, "fork() failed\n"); return 1; } if (p == 0) { struct io_uring_sqe *sqe; struct __kernel_timespec ts; msec_to_ts(&ts, 10000); sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts, 0, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 0; sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->user_data = 1; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: got %d, wanted 1\n", __FUNCTION__, ret); exit(1); } /* trigger full cancellation */ ret = execl(prog_path, prog_path, NULL); if (ret) { fprintf(stderr, "exec failed %i\n", errno); exit(1); } exit(0); } if (waitpid(p, &wstatus, 0) == (pid_t)-1) { perror("waitpid()"); return 1; } if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus)) { fprintf(stderr, "child failed %i\n", WEXITSTATUS(wstatus)); return 1; } for (i = 0; i < 2; ++i) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "wait_cqe=%d\n", ret); return 1; } if (cqe->res != -ECANCELED) { fprintf(stderr, "invalid result, user_data: %i res: %i\n", (int)cqe->user_data, cqe->res); return 1; } io_uring_cqe_seen(&ring, cqe); } io_uring_queue_exit(&ring); return 0; } static int test_not_failing_links(void) { struct io_uring ring; struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; int ret; ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring create failed: %d\n", ret); return 1; } msec_to_ts(&ts, 1); sqe = io_uring_get_sqe(&ring); io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS); sqe->user_data = 1; sqe->flags |= IOSQE_IO_LINK; sqe = io_uring_get_sqe(&ring); io_uring_prep_nop(sqe); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 2) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); return 1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); return 1; } else if (cqe->user_data == 1 && cqe->res == -EINVAL) { goto done; } else if (cqe->res != -ETIME || cqe->user_data != 1) { fprintf(stderr, "timeout failed %i %i\n", cqe->res, (int)cqe->user_data); return 1; } io_uring_cqe_seen(&ring, cqe); ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); return 1; } else if (cqe->res || cqe->user_data != 2) { fprintf(stderr, "nop failed %i %i\n", cqe->res, (int)cqe->user_data); return 1; } done: io_uring_cqe_seen(&ring, cqe); io_uring_queue_exit(&ring); return 0; } static int test_timeout_multishot(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_MULTISHOT); io_uring_sqe_set_data(sqe, (void *) 1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (int i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; if (ret == -EINVAL) { no_multishot = 1; return T_EXIT_SKIP; } if (!(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "%s: flag not set in cqe\n", __FUNCTION__); goto err; } if (ret != -ETIME) { fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret)); goto err; } io_uring_cqe_seen(ring, cqe); } sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_remove(sqe, 1, 0); io_uring_sqe_set_data(sqe, (void *) 2); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; if (ret < 0) { fprintf(stderr, "%s: remove failed: %s\n", __FUNCTION__, strerror(-ret)); goto err; } io_uring_cqe_seen(ring, cqe); ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; if (ret != -ECANCELED) { fprintf(stderr, "%s: timeout canceled: %s %llu\n", __FUNCTION__, strerror(-ret), cqe->user_data); goto err; } io_uring_cqe_seen(ring, cqe); return 0; err: return 1; } static int test_timeout_multishot_nr(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret; if (no_multishot) return T_EXIT_SKIP; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, TIMEOUT_MSEC); io_uring_prep_timeout(sqe, &ts, 3, IORING_TIMEOUT_MULTISHOT); io_uring_sqe_set_data(sqe, (void *) 1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } for (int i = 0; i < 3; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } if (i < 2 && !(cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "%s: flag not set in cqe\n", __FUNCTION__); goto err; } if (i == 3 && (cqe->flags & IORING_CQE_F_MORE)) { fprintf(stderr, "%s: flag set in cqe\n", __FUNCTION__); goto err; } ret = cqe->res; if (ret != -ETIME) { fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret)); goto err; } io_uring_cqe_seen(ring, cqe); } msec_to_ts(&ts, 2 * TIMEOUT_MSEC); ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts); if (ret != -ETIME) { fprintf(stderr, "%s: wait completion timeout %s\n", __FUNCTION__, strerror(-ret)); goto err; } return 0; err: return 1; } static int test_timeout_multishot_overflow(struct io_uring *ring) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; struct __kernel_timespec ts; int ret; if (no_multishot) return T_EXIT_SKIP; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } msec_to_ts(&ts, 10); io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_MULTISHOT); io_uring_sqe_set_data(sqe, (void *) 1); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; if (ret != -ETIME) { fprintf(stderr, "%s: Timeout: %s\n", __FUNCTION__, strerror(-ret)); goto err; } io_uring_cqe_seen(ring, cqe); sleep(1); if (!((*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW)) { goto err; } /* multishot timer should be gone */ sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__); goto err; } io_uring_prep_timeout_remove(sqe, 1, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret != -ETIME) { fprintf(stderr, "%s: remove failed: %d %s\n", __FUNCTION__, ret, strerror(-ret)); goto err; } return 0; err: return 1; } int main(int argc, char *argv[]) { struct io_uring ring, sqpoll_ring; bool has_timeout_update, sqpoll; struct io_uring_params p = { }; int ret; if (argc > 1) return 0; ret = io_uring_queue_init_params(8, &ring, &p); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = io_uring_queue_init(8, &sqpoll_ring, IORING_SETUP_SQPOLL); sqpoll = !ret; ret = test_single_timeout(&ring); if (ret) { fprintf(stderr, "test_single_timeout failed\n"); return ret; } if (not_supported) return 0; ret = test_multi_timeout(&ring); if (ret) { fprintf(stderr, "test_multi_timeout failed\n"); return ret; } ret = test_single_timeout_abs(&ring); if (ret) { fprintf(stderr, "test_single_timeout_abs failed\n"); return ret; } ret = test_single_timeout_remove(&ring); if (ret) { fprintf(stderr, "test_single_timeout_remove failed\n"); return ret; } ret = test_single_timeout_remove_notfound(&ring); if (ret) { fprintf(stderr, "test_single_timeout_remove_notfound failed\n"); return ret; } ret = test_single_timeout_many(&ring); if (ret) { fprintf(stderr, "test_single_timeout_many failed\n"); return ret; } ret = test_single_timeout_nr(&ring, 1); if (ret) { fprintf(stderr, "test_single_timeout_nr(1) failed\n"); return ret; } ret = test_single_timeout_nr(&ring, 2); if (ret) { fprintf(stderr, "test_single_timeout_nr(2) failed\n"); return ret; } ret = test_multi_timeout_nr(&ring); if (ret) { fprintf(stderr, "test_multi_timeout_nr failed\n"); return ret; } ret = test_timeout_flags1(&ring); if (ret) { fprintf(stderr, "test_timeout_flags1 failed\n"); return ret; } ret = test_timeout_flags2(&ring); if (ret) { fprintf(stderr, "test_timeout_flags2 failed\n"); return ret; } ret = test_timeout_flags3(&ring); if (ret) { fprintf(stderr, "test_timeout_flags3 failed\n"); return ret; } ret = test_timeout_multishot(&ring); if (ret && ret != T_EXIT_SKIP) { fprintf(stderr, "test_timeout_multishot failed\n"); return ret; } ret = test_timeout_multishot_nr(&ring); if (ret && ret != T_EXIT_SKIP) { fprintf(stderr, "test_timeout_multishot_nr failed\n"); return ret; } /* io_uring_wait_cqe_timeout() may have left a timeout, reinit ring */ io_uring_queue_exit(&ring); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_timeout_multishot_overflow(&ring); if (ret && ret != T_EXIT_SKIP) { fprintf(stderr, "test_timeout_multishot_overflow failed\n"); return ret; } /* io_uring_wait_cqe_timeout() may have left a timeout, reinit ring */ io_uring_queue_exit(&ring); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_single_timeout_wait(&ring, &p); if (ret) { fprintf(stderr, "test_single_timeout_wait failed\n"); return ret; } /* io_uring_wait_cqes() may have left a timeout, reinit ring */ io_uring_queue_exit(&ring); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed\n"); return 1; } ret = test_update_nonexistent_timeout(&ring); has_timeout_update = (ret != -EINVAL); if (has_timeout_update) { if (ret) { fprintf(stderr, "test_update_nonexistent_timeout failed\n"); return ret; } ret = test_update_invalid_flags(&ring); if (ret) { fprintf(stderr, "test_update_invalid_flags failed\n"); return ret; } ret = test_update_timeout(&ring, 0, false, false, false); if (ret) { fprintf(stderr, "test_update_timeout failed\n"); return ret; } ret = test_update_timeout(&ring, 1, false, false, false); if (ret) { fprintf(stderr, "test_update_timeout 1ms failed\n"); return ret; } ret = test_update_timeout(&ring, 1000, false, false, false); if (ret) { fprintf(stderr, "test_update_timeout 1s failed\n"); return ret; } ret = test_update_timeout(&ring, 0, true, true, false); if (ret) { fprintf(stderr, "test_update_timeout abs failed\n"); return ret; } ret = test_update_timeout(&ring, 0, false, true, false); if (ret) { fprintf(stderr, "test_update_timeout async failed\n"); return ret; } ret = test_update_timeout(&ring, 0, false, false, true); if (ret) { fprintf(stderr, "test_update_timeout linked failed\n"); return ret; } if (sqpoll) { ret = test_update_timeout(&sqpoll_ring, 0, false, false, false); if (ret) { fprintf(stderr, "test_update_timeout sqpoll" "failed\n"); return ret; } } } /* * this test must go last, it kills the ring */ ret = test_single_timeout_exit(&ring); if (ret) { fprintf(stderr, "test_single_timeout_exit failed\n"); return ret; } ret = test_timeout_link_cancel(); if (ret) { fprintf(stderr, "test_timeout_link_cancel failed\n"); return ret; } ret = test_not_failing_links(); if (ret) { fprintf(stderr, "test_not_failing_links failed\n"); return ret; } if (sqpoll) io_uring_queue_exit(&sqpoll_ring); return 0; } liburing-2.6/test/truncate.c000066400000000000000000000070021461424365000161500ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various truncate tests * */ #include #include #include #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define TWO_GIG_SIZE ((loff_t)2 * 1024 * 1024 * 1024) #define ONE_GIG_SIZE ((loff_t)1024 * 1024 * 1024) #define HALF_GIG_SIZE ((loff_t)512 * 1024 * 1024) static int test_truncate(struct io_uring *ring, int fd) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret = -1; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); return T_EXIT_FAIL; } memset(sqe, 0, sizeof(*sqe)); io_uring_prep_rw(IORING_OP_FTRUNCATE, sqe, fd, "fail", 0, 4); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); return T_EXIT_FAIL; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); return T_EXIT_FAIL; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); if (ret == -EINVAL) return T_EXIT_PASS; fprintf(stderr, "unexpected truncate res %d\n", ret); return T_EXIT_FAIL; } static int test_ftruncate(struct io_uring *ring, int fd, loff_t len) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } memset(sqe, 0, sizeof(*sqe)); io_uring_prep_ftruncate(sqe, fd, len); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int get_file_size(int fd, loff_t *size) { struct stat st; if (fstat(fd, &st) < 0) { perror("fstat"); return -1; } if (S_ISREG(st.st_mode)) { *size = st.st_size; return 0; } else if (S_ISBLK(st.st_mode)) { unsigned long long bytes; if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) { perror("ioctl"); return -1; } *size = bytes; return 0; } return -1; } int main(int argc, char *argv[]) { struct io_uring ring; char path[32] = ".truncate.XXXXXX"; int ret; int fd; int i; loff_t size; loff_t test_sizes[3]; if (argc > 1) return T_EXIT_SKIP; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return T_EXIT_FAIL; } fd = mkostemp(path, O_WRONLY | O_CREAT | O_TRUNC); if (fd < 0) { perror("mkostemp"); return T_EXIT_FAIL; } test_sizes[0] = TWO_GIG_SIZE; test_sizes[1] = ONE_GIG_SIZE; test_sizes[2] = HALF_GIG_SIZE; for (i = 0; i < 3; i++) { ret = test_ftruncate(&ring, fd, test_sizes[i]); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { if (i == 0) { fprintf(stdout, "Ftruncate not supported, skipping\n"); ret = T_EXIT_SKIP; goto out; } goto err; } fprintf(stderr, "ftruncate: %s\n", strerror(-ret)); goto err; } else if (ret) { fprintf(stderr, "unexpected cqe->res %d\n", ret); goto err; } if (get_file_size(fd, &size)) goto err; if (size != test_sizes[i]) { fprintf(stderr, "fail %d size=%llu, %llu\n", i, (unsigned long long) size, (unsigned long long) test_sizes[i]); goto err; } } ret = test_truncate(&ring, fd); if (ret != T_EXIT_PASS) goto err; out: unlink(path); close(fd); return T_EXIT_PASS; err: unlink(path); close(fd); return T_EXIT_FAIL; } liburing-2.6/test/tty-write-dpoll.c000066400000000000000000000022221461424365000174020ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Test double poll tty write. A test case for the regression fixed by: * * commit 6e295a664efd083ac9a5c1a8130c45be1db0cde7 * Author: Jens Axboe * Date: Tue Mar 22 13:11:28 2022 -0600 * * io_uring: fix assuming triggered poll waitqueue is the single poll * */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define SQES 128 #define BUFSIZE 512 int main(int argc, char *argv[]) { static char buf[BUFSIZE]; struct iovec vecs[SQES]; struct io_uring ring; int ret, i, fd; if (argc > 1) return 0; fd = open("/dev/ttyS0", O_RDWR | O_NONBLOCK); if (fd < 0) return 0; ret = t_create_ring(SQES, &ring, 0); if (ret == T_SETUP_SKIP) return 0; else if (ret < 0) return 1; for (i = 0; i < SQES; i++) { struct io_uring_sqe *sqe; sqe = io_uring_get_sqe(&ring); vecs[i].iov_base = buf; vecs[i].iov_len = sizeof(buf); io_uring_prep_writev(sqe, fd, &vecs[i], 1, 0); } ret = io_uring_submit(&ring); if (ret != SQES) { fprintf(stderr, "submit: %d\n", ret); return 1; } return 0; } liburing-2.6/test/unlink.c000066400000000000000000000035551461424365000156340ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: run various unlink tests * */ #include #include #include #include #include #include #include #include "liburing.h" static int test_unlink(struct io_uring *ring, const char *old) { struct io_uring_cqe *cqe; struct io_uring_sqe *sqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "get sqe failed\n"); goto err; } io_uring_prep_unlink(sqe, old, 0); ret = io_uring_submit(ring); if (ret <= 0) { fprintf(stderr, "sqe submit failed: %d\n", ret); goto err; } ret = io_uring_wait_cqe(ring, &cqe); if (ret < 0) { fprintf(stderr, "wait completion %d\n", ret); goto err; } ret = cqe->res; io_uring_cqe_seen(ring, cqe); return ret; err: return 1; } static int stat_file(const char *buf) { struct stat sb; if (!stat(buf, &sb)) return 0; return errno; } int main(int argc, char *argv[]) { struct io_uring ring; char buf[32] = "./XXXXXX"; int ret; if (argc > 1) return 0; ret = io_uring_queue_init(1, &ring, 0); if (ret) { fprintf(stderr, "ring setup failed: %d\n", ret); return 1; } ret = mkstemp(buf); if (ret < 0) { perror("mkstemp"); return 1; } close(ret); if (stat_file(buf) != 0) { perror("stat"); return 1; } ret = test_unlink(&ring, buf); if (ret < 0) { if (ret == -EBADF || ret == -EINVAL) { fprintf(stdout, "Unlink not supported, skipping\n"); unlink(buf); return 0; } fprintf(stderr, "rename: %s\n", strerror(-ret)); goto err; } else if (ret) goto err; ret = stat_file(buf); if (ret != ENOENT) { fprintf(stderr, "stat got %s\n", strerror(ret)); return 1; } ret = test_unlink(&ring, "/3/2/3/1/z/y"); if (ret != -ENOENT) { fprintf(stderr, "invalid unlink got %s\n", strerror(-ret)); return 1; } return 0; err: unlink(buf); return 1; } liburing-2.6/test/version.c000066400000000000000000000011001461424365000160010ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: check version macros and runtime checks work * */ #include "liburing.h" #include "helpers.h" int main(int argc, char *argv[]) { if (IO_URING_CHECK_VERSION(io_uring_major_version(), io_uring_minor_version())) return T_EXIT_FAIL; if (io_uring_major_version() != IO_URING_VERSION_MAJOR) return T_EXIT_FAIL; if (io_uring_minor_version() != IO_URING_VERSION_MINOR) return T_EXIT_FAIL; #if IO_URING_CHECK_VERSION(IO_URING_VERSION_MAJOR, IO_URING_VERSION_MINOR) return T_EXIT_FAIL; #endif return T_EXIT_PASS; } liburing-2.6/test/waitid.c000066400000000000000000000157461461424365000156220ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test waitid functionality */ #include #include #include #include #include "liburing.h" #include "helpers.h" static bool no_waitid; static void child(long usleep_time) { if (usleep_time) usleep(usleep_time); exit(0); } /* * Test linked timeout with child not exiting in time */ static int test_noexit(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; struct __kernel_timespec ts; siginfo_t si; pid_t pid; int ret, i; pid = fork(); if (!pid) { child(200000); exit(0); } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); sqe->flags |= IOSQE_IO_LINK; sqe->user_data = 1; ts.tv_sec = 0; ts.tv_nsec = 100 * 1000 * 1000ULL; sqe = io_uring_get_sqe(ring); io_uring_prep_link_timeout(sqe, &ts, 0); sqe->user_data = 2; io_uring_submit(ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } if (cqe->user_data == 2 && cqe->res != 1) { fprintf(stderr, "timeout res: %d\n", cqe->res); return T_EXIT_FAIL; } if (cqe->user_data == 1 && cqe->res != -ECANCELED) { fprintf(stderr, "waitid res: %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } return T_EXIT_PASS; } /* * Test one child exiting, but not the one we were looking for */ static int test_double(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; siginfo_t si; pid_t p1, p2; int ret; /* p1 will exit shortly */ p1 = fork(); if (!p1) { child(100000); exit(0); } /* p2 will linger */ p2 = fork(); if (!p2) { child(200000); exit(0); } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_PID, p2, &si, WEXITED, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "cqe res: %d\n", cqe->res); return T_EXIT_FAIL; } if (si.si_pid != p2) { fprintf(stderr, "expected pid %d, got %d\n", p2, si.si_pid); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); return T_EXIT_PASS; } /* * Test reaping of an already exited task */ static int test_ready(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; siginfo_t si; pid_t pid; int ret; pid = fork(); if (!pid) { child(0); exit(0); } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } if (cqe->res < 0) { fprintf(stderr, "cqe res: %d\n", cqe->res); return T_EXIT_FAIL; } if (si.si_pid != pid) { fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); return T_EXIT_PASS; } /* * Test cancelation of pending waitid */ static int test_cancel(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i; pid_t pid; pid = fork(); if (!pid) { child(20000); exit(0); } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_PID, pid, NULL, WEXITED, 0); sqe->user_data = 1; io_uring_submit(ring); sqe = io_uring_get_sqe(ring); io_uring_prep_cancel64(sqe, 1, 0); sqe->user_data = 2; io_uring_submit(ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } if (cqe->user_data == 1 && cqe->res != -ECANCELED) { fprintf(stderr, "cqe res: %d\n", cqe->res); return T_EXIT_FAIL; } if (cqe->user_data == 2 && cqe->res != 1) { fprintf(stderr, "cqe res: %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } return T_EXIT_PASS; } /* * Test cancelation of pending waitid, with expected races that either * waitid trigger or cancelation will win. */ static int test_cancel_race(struct io_uring *ring, int async) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret, i; pid_t pid; for (i = 0; i < 10; i++) { pid = fork(); if (!pid) { child(getpid() & 1); exit(0); } } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_ALL, -1, NULL, WEXITED, 0); if (async) sqe->flags |= IOSQE_ASYNC; sqe->user_data = 1; io_uring_submit(ring); sqe = io_uring_get_sqe(ring); io_uring_prep_cancel64(sqe, 1, 0); sqe->user_data = 2; usleep(1); io_uring_submit(ring); for (i = 0; i < 2; i++) { ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } if (cqe->user_data == 1 && !(cqe->res == -ECANCELED || cqe->res == 0)) { fprintf(stderr, "cqe1 res: %d\n", cqe->res); return T_EXIT_FAIL; } if (cqe->user_data == 2 && !(cqe->res == 1 || cqe->res == 0 || cqe->res == -ENOENT || cqe->res == -EALREADY)) { fprintf(stderr, "cqe2 res: %d\n", cqe->res); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); } return T_EXIT_PASS; } /* * Test basic reap of child exit */ static int test(struct io_uring *ring) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; siginfo_t si; pid_t pid; int ret; pid = fork(); if (!pid) { child(100); exit(0); } sqe = io_uring_get_sqe(ring); io_uring_prep_waitid(sqe, P_PID, pid, &si, WEXITED, 0); io_uring_submit(ring); ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "cqe wait: %d\n", ret); return T_EXIT_FAIL; } /* no waitid support */ if (cqe->res == -EINVAL) { no_waitid = true; return T_EXIT_SKIP; } if (cqe->res < 0) { fprintf(stderr, "cqe res: %d\n", cqe->res); return T_EXIT_FAIL; } if (si.si_pid != pid) { fprintf(stderr, "expected pid %d, got %d\n", pid, si.si_pid); return T_EXIT_FAIL; } io_uring_cqe_seen(ring, cqe); return T_EXIT_PASS; } int main(int argc, char *argv[]) { struct io_uring ring; int ret, i; if (argc > 1) return T_EXIT_SKIP; io_uring_queue_init(8, &ring, 0); ret = test(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test failed\n"); return T_EXIT_FAIL; } if (no_waitid) return T_EXIT_SKIP; ret = test_noexit(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_noexit failed\n"); return T_EXIT_FAIL; } ret = test_noexit(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_noexit failed\n"); return T_EXIT_FAIL; } ret = test_double(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_double failed\n"); return T_EXIT_FAIL; } ret = test_ready(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_ready failed\n"); return T_EXIT_FAIL; } ret = test_cancel(&ring); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_cancel failed\n"); return T_EXIT_FAIL; } for (i = 0; i < 1000; i++) { ret = test_cancel_race(&ring, i & 1); if (ret == T_EXIT_FAIL) { fprintf(stderr, "test_cancel_race failed\n"); return T_EXIT_FAIL; } } io_uring_queue_exit(&ring); return T_EXIT_PASS; } liburing-2.6/test/wakeup-hang.c000066400000000000000000000056021461424365000165360ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include "liburing.h" struct thread_data { struct io_uring *ring; int write_fd; }; static void error_exit(char *message) { perror(message); exit(1); } static void *listener_thread(void *data) { struct thread_data *td = data; struct io_uring_cqe *cqe; int ret; ret = io_uring_wait_cqe(td->ring, &cqe); if (ret < 0) { fprintf(stderr, "Error waiting for completion: %s\n", strerror(-ret)); goto err; } if (cqe->res < 0) { fprintf(stderr, "Error in async operation: %s\n", strerror(-cqe->res)); goto err; } io_uring_cqe_seen(td->ring, cqe); return NULL; err: return (void *) 1; } static void *wakeup_io_uring(void *data) { struct thread_data *td = data; int res; res = eventfd_write(td->write_fd, (eventfd_t) 1L); if (res < 0) { perror("eventfd_write"); return (void *) 1; } return NULL; } static int test_pipes(void) { struct io_uring_sqe *sqe; struct thread_data td; struct io_uring ring; pthread_t t1, t2; int ret, fds[2]; void *pret; if (pipe(fds) < 0) error_exit("eventfd"); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret)); return 1; } td.write_fd = fds[1]; td.ring = ˚ sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, fds[0], POLLIN); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "ring_submit=%d\n", ret); return 1; } pthread_create(&t1, NULL, listener_thread, &td); sleep(1); pthread_create(&t2, NULL, wakeup_io_uring, &td); pthread_join(t1, &pret); io_uring_queue_exit(&ring); return pret != NULL; } static int test_eventfd(void) { struct io_uring_sqe *sqe; struct thread_data td; struct io_uring ring; pthread_t t1, t2; int efd, ret; void *pret; efd = eventfd(0, 0); if (efd < 0) error_exit("eventfd"); ret = io_uring_queue_init(8, &ring, 0); if (ret) { fprintf(stderr, "Unable to setup io_uring: %s\n", strerror(-ret)); return 1; } td.write_fd = efd; td.ring = ˚ sqe = io_uring_get_sqe(&ring); io_uring_prep_poll_add(sqe, efd, POLLIN); sqe->user_data = 2; ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "ring_submit=%d\n", ret); return 1; } pthread_create(&t1, NULL, listener_thread, &td); sleep(1); pthread_create(&t2, NULL, wakeup_io_uring, &td); pthread_join(t1, &pret); io_uring_queue_exit(&ring); return pret != NULL; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return 0; ret = test_pipes(); if (ret) { fprintf(stderr, "test_pipe failed\n"); return ret; } ret = test_eventfd(); if (ret) { fprintf(stderr, "test_eventfd failed\n"); return ret; } return 0; } liburing-2.6/test/wq-aff.c000066400000000000000000000051641461424365000155130ustar00rootroot00000000000000/* SPDX-License-Identifier: MIT */ /* * Description: test that io-wq affinity is correctly set for SQPOLL */ #include #include #include #include #include #include "liburing.h" #include "helpers.h" #define IOWQ_CPU 0 #define SQPOLL_CPU 1 static int verify_comm(pid_t pid, const char *name, int cpu) { char comm[64], buf[64]; cpu_set_t set; int fd, ret; sprintf(comm, "/proc/%d/comm", pid); fd = open(comm, O_RDONLY); if (fd < 0) { perror("open"); return T_EXIT_SKIP; } ret = read(fd, buf, sizeof(buf)); if (ret < 0) { close(fd); return T_EXIT_SKIP; } if (strncmp(buf, name, strlen(name) - 1)) { close(fd); return T_EXIT_SKIP; } close(fd); ret = sched_getaffinity(pid, sizeof(set), &set); if (ret < 0) { perror("sched_getaffinity"); return T_EXIT_SKIP; } if (CPU_COUNT(&set) != 1) { fprintf(stderr, "More than one CPU set in mask\n"); return T_EXIT_FAIL; } if (!CPU_ISSET(cpu, &set)) { fprintf(stderr, "Wrong CPU set in mask\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } static int verify_affinity(pid_t pid, int sqpoll) { pid_t wq_pid, sqpoll_pid = -1; char name[64]; int ret; wq_pid = pid + 2; if (sqpoll) sqpoll_pid = pid + 1; /* verify we had the pids right */ sprintf(name, "iou-wrk-%d", pid); ret = verify_comm(wq_pid, name, IOWQ_CPU); if (ret != T_EXIT_PASS) return ret; if (sqpoll_pid != -1) { sprintf(name, "iou-sqp-%d", pid); ret = verify_comm(sqpoll_pid, name, SQPOLL_CPU); if (ret != T_EXIT_PASS) return ret; } return T_EXIT_PASS; } static int test(int sqpoll) { struct io_uring_params p = { }; struct io_uring ring; struct io_uring_sqe *sqe; char buf[64]; int fds[2], ret; cpu_set_t set; if (sqpoll) { p.flags = IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF; p.sq_thread_cpu = SQPOLL_CPU; } io_uring_queue_init_params(8, &ring, &p); CPU_ZERO(&set); CPU_SET(IOWQ_CPU, &set); ret = io_uring_register_iowq_aff(&ring, sizeof(set), &set); if (ret) { fprintf(stderr, "register aff: %d\n", ret); return T_EXIT_FAIL; } if (pipe(fds) < 0) { perror("pipe"); return T_EXIT_FAIL; } sqe = io_uring_get_sqe(&ring); io_uring_prep_read(sqe, fds[0], buf, sizeof(buf), 0); sqe->flags |= IOSQE_ASYNC; io_uring_submit(&ring); usleep(10000); ret = verify_affinity(getpid(), sqpoll); io_uring_queue_exit(&ring); return ret; } int main(int argc, char *argv[]) { int ret; if (argc > 1) return T_EXIT_SKIP; ret = test(1); if (ret == T_EXIT_SKIP) { return T_EXIT_SKIP; } else if (ret != T_EXIT_PASS) { fprintf(stderr, "test sqpoll failed\n"); return T_EXIT_FAIL; } return T_EXIT_PASS; } liburing-2.6/test/xattr.c000066400000000000000000000243501461424365000154720ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "helpers.h" #include "liburing.h" static int no_xattr; /* Define constants. */ #define XATTR_SIZE 255 #define QUEUE_DEPTH 32 #define FILENAME "xattr.test" #define KEY1 "user.val1" #define KEY2 "user.val2" #define VALUE1 "value1" #define VALUE2 "value2-a-lot-longer" /* Call fsetxattr. */ static int io_uring_fsetxattr(struct io_uring *ring, int fd, const char *name, const void *value, size_t size, int flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Error cannot get sqe\n"); return -1; } io_uring_prep_fsetxattr(sqe, fd, name, value, flags, size); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "Error io_uring_submit_and_wait: ret=%d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "Error io_uring_wait_cqe: ret=%d\n", ret); return -1; } ret = cqe->res; if (ret < 0) { if (cqe->res == -EINVAL || cqe->res == -EOPNOTSUPP) no_xattr = 1; } io_uring_cqe_seen(ring, cqe); return ret; } /* Submit fgetxattr request. */ static int io_uring_fgetxattr(struct io_uring *ring, int fd, const char *name, void *value, size_t size) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Error cannot get sqe\n"); return -1; } io_uring_prep_fgetxattr(sqe, fd, name, value, size); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "Error io_uring_submit_and_wait: ret=%d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "Error io_uring_wait_cqe: ret=%d\n", ret); return -1; } ret = cqe->res; if (ret == -1) { fprintf(stderr, "Error couldn'tget value\n"); return -1; } io_uring_cqe_seen(ring, cqe); return ret; } /* Call setxattr. */ static int io_uring_setxattr(struct io_uring *ring, const char *path, const char *name, const void *value, size_t size, int flags) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Error cannot get sqe\n"); return -1; } io_uring_prep_setxattr(sqe, name, value, path, flags, size); ret = io_uring_submit_and_wait(ring, 1); if (ret != 1) { fprintf(stderr, "Error io_uring_submit_and_wait: ret=%d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "Error io_uring_wait_cqe: ret=%d\n", ret); return -1; } ret = cqe->res; if (ret < 0) { if (ret == -EINVAL || ret == -EOPNOTSUPP) no_xattr = 1; } io_uring_cqe_seen(ring, cqe); return ret; } /* Submit getxattr request. */ static int io_uring_getxattr(struct io_uring *ring, const char *path, const char *name, void *value, size_t size) { struct io_uring_sqe *sqe; struct io_uring_cqe *cqe; int ret; sqe = io_uring_get_sqe(ring); if (!sqe) { fprintf(stderr, "Error cannot get sqe\n"); return -1; } io_uring_prep_getxattr(sqe, name, value, path, size); ret = io_uring_submit(ring); if (ret != 1) { fprintf(stderr, "Error io_uring_submit_and_wait: ret=%d\n", ret); return -1; } ret = io_uring_wait_cqe(ring, &cqe); if (ret) { fprintf(stderr, "Error io_uring_wait_cqe: ret=%d\n", ret); return -1; } ret = cqe->res; if (ret == -1) { fprintf(stderr, "Error couldn'tget value\n"); return -1; } io_uring_cqe_seen(ring, cqe); return ret; } /* Test driver for fsetxattr and fgetxattr. */ static int test_fxattr(void) { int rc = 0; size_t value_len; struct io_uring ring; char value[XATTR_SIZE]; /* Init io-uring queue. */ int ret = io_uring_queue_init(QUEUE_DEPTH, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return -1; } /* Create the test file. */ int fd = open(FILENAME, O_CREAT | O_RDWR, 0644); if (fd < 0) { fprintf(stderr, "Error: cannot open file: ret=%d\n", fd); return -1; } /* Test writing attributes. */ if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, strlen(VALUE1), 0) < 0) { if (no_xattr) { fprintf(stdout, "No xattr support, skipping\n"); goto Exit; } fprintf(stderr, "Error fsetxattr cannot write key1\n"); rc = -1; goto Exit; } if (io_uring_fsetxattr(&ring, fd, KEY2, VALUE2, strlen(VALUE2), 0) < 0) { fprintf(stderr, "Error fsetxattr cannot write key1\n"); rc = -1; goto Exit; } /* Test reading attributes. */ value_len = io_uring_fgetxattr(&ring, fd, KEY1, value, XATTR_SIZE); if (value_len != strlen(VALUE1) || strncmp(value, VALUE1, value_len)) { fprintf(stderr, "Error: fgetxattr expected value: %s, returned value: %s\n", VALUE1, value); rc = -1; goto Exit; } value_len = io_uring_fgetxattr(&ring, fd, KEY2, value, XATTR_SIZE); if (value_len != strlen(VALUE2) || strncmp(value, VALUE2, value_len)) { fprintf(stderr, "Error: fgetxattr expected value: %s, returned value: %s\n", VALUE2, value); rc = -1; goto Exit; } /* Cleanup. */ Exit: close(fd); unlink(FILENAME); io_uring_queue_exit(&ring); return rc; } /* Test driver for setxattr and getxattr. */ static int test_xattr(void) { int rc = 0; int value_len; struct io_uring ring; char value[XATTR_SIZE]; /* Init io-uring queue. */ int ret = io_uring_queue_init(QUEUE_DEPTH, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return -1; } /* Create the test file. */ t_create_file(FILENAME, 0); /* Test writing attributes. */ if (io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, strlen(VALUE1), 0) < 0) { fprintf(stderr, "Error setxattr cannot write key1\n"); rc = -1; goto Exit; } if (io_uring_setxattr(&ring, FILENAME, KEY2, VALUE2, strlen(VALUE2), 0) < 0) { fprintf(stderr, "Error setxattr cannot write key1\n"); rc = -1; goto Exit; } /* Test reading attributes. */ value_len = io_uring_getxattr(&ring, FILENAME, KEY1, value, XATTR_SIZE); if (value_len != strlen(VALUE1) || strncmp(value, VALUE1, value_len)) { fprintf(stderr, "Error: getxattr expected value: %s, returned value: %s\n", VALUE1, value); rc = -1; goto Exit; } value_len = io_uring_getxattr(&ring, FILENAME, KEY2, value, XATTR_SIZE); if (value_len != strlen(VALUE2) || strncmp(value, VALUE2, value_len)) { fprintf(stderr, "Error: getxattr expected value: %s, returned value: %s\n", VALUE2, value); rc = -1; goto Exit; } /* Cleanup. */ Exit: io_uring_queue_exit(&ring); unlink(FILENAME); return rc; } /* Test driver for failure cases of fsetxattr and fgetxattr. */ static int test_failure_fxattr(void) { struct io_uring ring; char value[XATTR_SIZE]; /* Init io-uring queue. */ int ret = io_uring_queue_init(QUEUE_DEPTH, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return -1; } /* Create the test file. */ int fd = open(FILENAME, O_CREAT | O_RDWR, 0644); if (fd < 0) { fprintf(stderr, "Error: cannot open file: ret=%d\n", fd); return -1; } /* Test writing attributes. */ if (io_uring_fsetxattr(&ring, -1, KEY1, VALUE1, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_fsetxattr(&ring, fd, NULL, VALUE1, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_fsetxattr(&ring, fd, KEY1, NULL, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, 0, 0) != 0) return 1; if (io_uring_fsetxattr(&ring, fd, KEY1, VALUE1, -1, 0) >= 0) return 1; /* Test reading attributes. */ if (io_uring_fgetxattr(&ring, -1, KEY1, value, XATTR_SIZE) >= 0) return 1; if (io_uring_fgetxattr(&ring, fd, NULL, value, XATTR_SIZE) >= 0) return 1; if (io_uring_fgetxattr(&ring, fd, KEY1, value, 0) != 0) return 1; /* Cleanup. */ close(fd); unlink(FILENAME); io_uring_queue_exit(&ring); return 0; } /* Test driver for failure cases for setxattr and getxattr. */ static int test_failure_xattr(void) { struct io_uring ring; char value[XATTR_SIZE]; /* Init io-uring queue. */ int ret = io_uring_queue_init(QUEUE_DEPTH, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return -1; } /* Create the test file. */ t_create_file(FILENAME, 0); /* Test writing attributes. */ if (io_uring_setxattr(&ring, "complete garbage", KEY1, VALUE1, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_setxattr(&ring, NULL, KEY1, VALUE1, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_setxattr(&ring, FILENAME, NULL, VALUE1, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_setxattr(&ring, FILENAME, KEY1, NULL, strlen(VALUE1), 0) >= 0) return 1; if (io_uring_setxattr(&ring, FILENAME, KEY1, VALUE1, 0, 0) != 0) return 1; /* Test reading attributes. */ if (io_uring_getxattr(&ring, "complete garbage", KEY1, value, XATTR_SIZE) >= 0) return 1; if (io_uring_getxattr(&ring, NULL, KEY1, value, XATTR_SIZE) >= 0) return 1; if (io_uring_getxattr(&ring, FILENAME, NULL, value, XATTR_SIZE) >= 0) return 1; if (io_uring_getxattr(&ring, FILENAME, KEY1, NULL, XATTR_SIZE) != 0) return 1; if (io_uring_getxattr(&ring, FILENAME, KEY1, value, 0) != 0) return 1; /* Cleanup. */ io_uring_queue_exit(&ring); unlink(FILENAME); return 0; } /* Test for invalid SQE, this will cause a segmentation fault if enabled. */ static int test_invalid_sqe(void) { #ifdef DESTRUCTIVE_TEST struct io_uring_sqe *sqe = NULL; struct io_uring_cqe *cqe = NULL; struct io_uring ring; /* Init io-uring queue. */ int ret = io_uring_queue_init(QUEUE_DEPTH, &ring, 0); if (ret) { fprintf(stderr, "child: ring setup failed: %d\n", ret); return -1; } /* Pass invalid SQE. */ io_uring_prep_setxattr(sqe, FILENAME, KEY1, VALUE1, strlen(VALUE1), 0); ret = io_uring_submit(&ring); if (ret != 1) { fprintf(stderr, "Error io_uring_submit_and_wait: ret=%d\n", ret); return -1; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret) { fprintf(stderr, "Error io_uring_wait_cqe: ret=%d\n", ret); return -1; } ret = cqe->res; io_uring_cqe_seen(&ring, cqe); return ret; #else return 0; #endif } /* Test driver. */ int main(int argc, char *argv[]) { if (argc > 1) return 0; if (test_fxattr()) return EXIT_FAILURE; if (no_xattr) return EXIT_SUCCESS; if (test_xattr() || test_failure_fxattr() || test_failure_xattr() || test_invalid_sqe()) return EXIT_FAILURE; return EXIT_SUCCESS; }